Import Arm Optimized Routines v21.02

This is the new replacement for the existing cortex-strings code which will
be replaced in a follow-up commit.
We should also be able to use some of the math functions to allow the
tests to pass on AArch64 (and other architectures) instead of just x86.
We might also be able to reuse some of the tests for the kyua testsuite.

Imported using
```
curl -L e823e3abf5 | tar --strip-components=1 -xvzf -
git add .
```

Differential Revision: https://reviews.freebsd.org/D29035
git-subtree-dir: contrib/arm-optimized-routines
git-subtree-mainline: e34c713b0e
git-subtree-split: f9f37c002a
This commit is contained in:
Alex Richardson 2021-07-06 11:02:44 +01:00
commit 31914882fc
208 changed files with 78517 additions and 0 deletions

View file

@ -0,0 +1,3 @@
build/
.DS_Store
config.mk

View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 1999-2019, Arm Limited.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -0,0 +1,89 @@
# Makefile - requires GNU make
#
# Copyright (c) 2018-2020, Arm Limited.
# SPDX-License-Identifier: MIT
srcdir = .
prefix = /usr
bindir = $(prefix)/bin
libdir = $(prefix)/lib
includedir = $(prefix)/include
# Configure these in config.mk, do not make changes in this file.
SUBS = math string networking
HOST_CC = cc
HOST_CFLAGS = -std=c99 -O2
HOST_LDFLAGS =
HOST_LDLIBS =
EMULATOR =
CPPFLAGS =
CFLAGS = -std=c99 -O2
CFLAGS_SHARED = -fPIC
CFLAGS_ALL = -Ibuild/include $(CPPFLAGS) $(CFLAGS)
LDFLAGS =
LDLIBS =
AR = $(CROSS_COMPILE)ar
RANLIB = $(CROSS_COMPILE)ranlib
INSTALL = install
all:
-include config.mk
$(foreach sub,$(SUBS),$(eval include $(srcdir)/$(sub)/Dir.mk))
# Required targets of subproject foo:
# all-foo
# check-foo
# clean-foo
# install-foo
# Required make variables of subproject foo:
# foo-files: Built files (all in build/).
# Make variables used by subproject foo:
# foo-...: Variables defined in foo/Dir.mk or by config.mk.
all: $(SUBS:%=all-%)
ALL_FILES = $(foreach sub,$(SUBS),$($(sub)-files))
DIRS = $(sort $(patsubst %/,%,$(dir $(ALL_FILES))))
$(ALL_FILES): | $(DIRS)
$(DIRS):
mkdir -p $@
$(filter %.os,$(ALL_FILES)): CFLAGS_ALL += $(CFLAGS_SHARED)
build/%.o: $(srcdir)/%.S
$(CC) $(CFLAGS_ALL) -c -o $@ $<
build/%.o: $(srcdir)/%.c
$(CC) $(CFLAGS_ALL) -c -o $@ $<
build/%.os: $(srcdir)/%.S
$(CC) $(CFLAGS_ALL) -c -o $@ $<
build/%.os: $(srcdir)/%.c
$(CC) $(CFLAGS_ALL) -c -o $@ $<
clean: $(SUBS:%=clean-%)
rm -rf build
distclean: clean
rm -f config.mk
$(DESTDIR)$(bindir)/%: build/bin/%
$(INSTALL) -D $< $@
$(DESTDIR)$(libdir)/%.so: build/lib/%.so
$(INSTALL) -D $< $@
$(DESTDIR)$(libdir)/%: build/lib/%
$(INSTALL) -m 644 -D $< $@
$(DESTDIR)$(includedir)/%: build/include/%
$(INSTALL) -m 644 -D $< $@
install: $(SUBS:%=install-%)
check: $(SUBS:%=check-%)
.PHONY: all clean distclean install check

View file

@ -0,0 +1,56 @@
Arm Optimized Routines
----------------------
This repository contains implementations of library functions
provided by Arm under MIT License (See LICENSE). Contributions
to this project are accepted, but Contributors have to sign an
Assignment Agreement, please follow the instructions in
contributor-agreement.pdf. This is needed so upstreaming code
to projects that require copyright assignment is possible.
Regular quarterly releases are tagged as vYY.MM, the latest
release is v20.11.
Source code layout:
build/ - build directory (created by make).
math/ - math subproject sources.
math/include/ - math library public headers.
math/test/ - math test and benchmark related sources.
math/tools/ - tools used for designing the algorithms.
networking/ - networking subproject sources.
networking/include/ - networking library public headers.
networking/test/ - networking test and benchmark related sources.
string/ - string routines subproject sources.
string/include/ - string library public headers.
string/test/ - string test and benchmark related sources.
The steps to build the target libraries and run the tests:
cp config.mk.dist config.mk
# edit config.mk if necessary ...
make
make check
Or building outside of the source directory:
ln -s path/to/src/Makefile Makefile
cp path/to/src/config.mk.dist config.mk
echo 'srcdir = path/to/src' >> config.mk
# further edits to config.mk
make
make check
Or building and testing the math subproject only:
make all-math
make check-math
The test system requires libmpfr and libmpc.
For example on debian linux they can be installed as:
sudo apt-get install libmpfr-dev libmpc-dev
For cross build, CROSS_COMPILE should be set in config.mk and EMULATOR
should be set for cross testing (e.g. using qemu-user or remote access
to a target machine), see the examples in config.mk.dist.

View file

@ -0,0 +1,73 @@
# Example config.mk
#
# Copyright (c) 2018-2020, Arm Limited.
# SPDX-License-Identifier: MIT
# Subprojects to build
SUBS = math string networking
# Target architecture: aarch64, arm or x86_64
ARCH = aarch64
# Use for cross compilation with gcc.
#CROSS_COMPILE = aarch64-none-linux-gnu-
# Compiler for the target
CC = $(CROSS_COMPILE)gcc
CFLAGS = -std=c99 -pipe -O3
CFLAGS += -Wall -Wno-missing-braces
CFLAGS += -Werror=implicit-function-declaration
# Used for test case generator that is executed on the host
HOST_CC = gcc
HOST_CFLAGS = -std=c99 -O2
HOST_CFLAGS += -Wall -Wno-unused-function
# Enable debug info.
HOST_CFLAGS += -g
CFLAGS += -g
# Optimize the shared libraries on aarch64 assuming they fit in 1M.
#CFLAGS_SHARED = -fPIC -mcmodel=tiny
# Enable MTE support.
#CFLAGS += -march=armv8.5-a+memtag -DWANT_MTE_TEST=1
# Use with cross testing.
#EMULATOR = qemu-aarch64-static
#EMULATOR = sh -c 'scp $$1 user@host:/dir && ssh user@host /dir/"$$@"' --
# Additional flags for subprojects.
math-cflags =
math-ldlibs =
math-ulpflags =
math-testflags =
string-cflags =
networking-cflags =
# Use if mpfr is available on the target for ulp error checking.
#math-ldlibs += -lmpfr -lgmp
#math-cflags += -DUSE_MPFR
# Use with gcc.
math-cflags += -frounding-math -fexcess-precision=standard -fno-stack-protector
math-cflags += -ffp-contract=fast -fno-math-errno
# Use with clang.
#math-cflags += -ffp-contract=fast
# Disable vector math code
#math-cflags += -DWANT_VMATH=0
# Disable fenv checks
#math-ulpflags = -q -f
#math-testflags = -nostatus
# Remove GNU Property Notes from asm files.
#string-cflags += -DWANT_GNU_PROPERTY=0
# Enable assertion checks.
#networking-cflags += -DWANT_ASSERT
# Avoid auto-vectorization of scalar code and unroll loops
networking-cflags += -O2 -fno-tree-vectorize -funroll-loops

View file

@ -0,0 +1,110 @@
# Makefile fragment - requires GNU make
#
# Copyright (c) 2019, Arm Limited.
# SPDX-License-Identifier: MIT
S := $(srcdir)/math
B := build/math
math-lib-srcs := $(wildcard $(S)/*.[cS])
math-test-srcs := \
$(S)/test/mathtest.c \
$(S)/test/mathbench.c \
$(S)/test/ulp.c \
math-test-host-srcs := $(wildcard $(S)/test/rtest/*.[cS])
math-includes := $(patsubst $(S)/%,build/%,$(wildcard $(S)/include/*.h))
math-libs := \
build/lib/libmathlib.so \
build/lib/libmathlib.a \
math-tools := \
build/bin/mathtest \
build/bin/mathbench \
build/bin/mathbench_libc \
build/bin/runulp.sh \
build/bin/ulp \
math-host-tools := \
build/bin/rtest \
math-lib-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-lib-srcs)))
math-test-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-test-srcs)))
math-host-objs := $(patsubst $(S)/%,$(B)/%.o,$(basename $(math-test-host-srcs)))
math-target-objs := $(math-lib-objs) $(math-test-objs)
math-objs := $(math-target-objs) $(math-target-objs:%.o=%.os) $(math-host-objs)
math-files := \
$(math-objs) \
$(math-libs) \
$(math-tools) \
$(math-host-tools) \
$(math-includes) \
all-math: $(math-libs) $(math-tools) $(math-includes)
$(math-objs): $(math-includes)
$(math-objs): CFLAGS_ALL += $(math-cflags)
$(B)/test/mathtest.o: CFLAGS_ALL += -fmath-errno
$(math-host-objs): CC = $(HOST_CC)
$(math-host-objs): CFLAGS_ALL = $(HOST_CFLAGS)
$(B)/test/ulp.o: $(S)/test/ulp.h
build/lib/libmathlib.so: $(math-lib-objs:%.o=%.os)
$(CC) $(CFLAGS_ALL) $(LDFLAGS) -shared -o $@ $^
build/lib/libmathlib.a: $(math-lib-objs)
rm -f $@
$(AR) rc $@ $^
$(RANLIB) $@
$(math-host-tools): HOST_LDLIBS += -lm -lmpfr -lmpc
$(math-tools): LDLIBS += $(math-ldlibs) -lm
build/bin/rtest: $(math-host-objs)
$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LDLIBS)
build/bin/mathtest: $(B)/test/mathtest.o build/lib/libmathlib.a
$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
build/bin/mathbench: $(B)/test/mathbench.o build/lib/libmathlib.a
$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
# This is not ideal, but allows custom symbols in mathbench to get resolved.
build/bin/mathbench_libc: $(B)/test/mathbench.o build/lib/libmathlib.a
$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $< $(LDLIBS) -lc build/lib/libmathlib.a -lm
build/bin/ulp: $(B)/test/ulp.o build/lib/libmathlib.a
$(CC) $(CFLAGS_ALL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
build/include/%.h: $(S)/include/%.h
cp $< $@
build/bin/%.sh: $(S)/test/%.sh
cp $< $@
math-tests := $(wildcard $(S)/test/testcases/directed/*.tst)
math-rtests := $(wildcard $(S)/test/testcases/random/*.tst)
check-math-test: $(math-tools)
cat $(math-tests) | $(EMULATOR) build/bin/mathtest $(math-testflags)
check-math-rtest: $(math-host-tools) $(math-tools)
cat $(math-rtests) | build/bin/rtest | $(EMULATOR) build/bin/mathtest $(math-testflags)
check-math-ulp: $(math-tools)
ULPFLAGS="$(math-ulpflags)" build/bin/runulp.sh $(EMULATOR)
check-math: check-math-test check-math-rtest check-math-ulp
install-math: \
$(math-libs:build/lib/%=$(DESTDIR)$(libdir)/%) \
$(math-includes:build/include/%=$(DESTDIR)$(includedir)/%)
clean-math:
rm -f $(math-files)
.PHONY: all-math check-math-test check-math-rtest check-math-ulp check-math install-math clean-math

View file

@ -0,0 +1,63 @@
/*
* Single-precision cos function.
*
* Copyright (c) 2018-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <stdint.h>
#include <math.h>
#include "math_config.h"
#include "sincosf.h"
/* Fast cosf implementation. Worst-case ULP is 0.5607, maximum relative
error is 0.5303 * 2^-23. A single-step range reduction is used for
small values. Large inputs have their range reduced using fast integer
arithmetic. */
float
cosf (float y)
{
double x = y;
double s;
int n;
const sincos_t *p = &__sincosf_table[0];
if (abstop12 (y) < abstop12 (pio4))
{
double x2 = x * x;
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
return 1.0f;
return sinf_poly (x, x2, p, 1);
}
else if (likely (abstop12 (y) < abstop12 (120.0f)))
{
x = reduce_fast (x, p, &n);
/* Setup the signs for sin and cos. */
s = p->sign[n & 3];
if (n & 2)
p = &__sincosf_table[1];
return sinf_poly (x * s, x * x, p, n ^ 1);
}
else if (abstop12 (y) < abstop12 (INFINITY))
{
uint32_t xi = asuint (y);
int sign = xi >> 31;
x = reduce_large (xi, &n);
/* Setup signs for sin and cos - include original sign. */
s = p->sign[(n + sign) & 3];
if ((n + sign) & 2)
p = &__sincosf_table[1];
return sinf_poly (x * s, x * x, p, n ^ 1);
}
else
return __math_invalidf (y);
}

View file

@ -0,0 +1,244 @@
/*
* Double-precision erf(x) function.
*
* Copyright (c) 2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
#include <math.h>
#include <stdint.h>
#define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
#define C 0x1.b0ac16p-1
#define PA __erf_data.erf_poly_A
#define NA __erf_data.erf_ratio_N_A
#define DA __erf_data.erf_ratio_D_A
#define NB __erf_data.erf_ratio_N_B
#define DB __erf_data.erf_ratio_D_B
#define PC __erf_data.erfc_poly_C
#define PD __erf_data.erfc_poly_D
#define PE __erf_data.erfc_poly_E
#define PF __erf_data.erfc_poly_F
/* Top 32 bits of a double. */
static inline uint32_t
top32 (double x)
{
return asuint64 (x) >> 32;
}
/* Fast erf implementation using a mix of
rational and polynomial approximations.
Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0. */
double
erf (double x)
{
/* Get top word and sign. */
uint32_t ix = top32 (x);
uint32_t ia = ix & 0x7fffffff;
uint32_t sign = ix >> 31;
/* Normalized and subnormal cases */
if (ia < 0x3feb0000)
{ /* a = |x| < 0.84375. */
if (ia < 0x3e300000)
{ /* a < 2^(-28). */
if (ia < 0x00800000)
{ /* a < 2^(-1015). */
double y = fma (TwoOverSqrtPiMinusOne, x, x);
return check_uflow (y);
}
return x + TwoOverSqrtPiMinusOne * x;
}
double x2 = x * x;
if (ia < 0x3fe00000)
{ /* a < 0.5 - Use polynomial approximation. */
double r1 = fma (x2, PA[1], PA[0]);
double r2 = fma (x2, PA[3], PA[2]);
double r3 = fma (x2, PA[5], PA[4]);
double r4 = fma (x2, PA[7], PA[6]);
double r5 = fma (x2, PA[9], PA[8]);
double x4 = x2 * x2;
double r = r5;
r = fma (x4, r, r4);
r = fma (x4, r, r3);
r = fma (x4, r, r2);
r = fma (x4, r, r1);
return fma (r, x, x); /* This fma is crucial for accuracy. */
}
else
{ /* 0.5 <= a < 0.84375 - Use rational approximation. */
double x4, x8, r1n, r2n, r1d, r2d, r3d;
r1n = fma (x2, NA[1], NA[0]);
x4 = x2 * x2;
r2n = fma (x2, NA[3], NA[2]);
x8 = x4 * x4;
r1d = fma (x2, DA[0], 1.0);
r2d = fma (x2, DA[2], DA[1]);
r3d = fma (x2, DA[4], DA[3]);
double P = r1n + x4 * r2n + x8 * NA[4];
double Q = r1d + x4 * r2d + x8 * r3d;
return fma (P / Q, x, x);
}
}
else if (ia < 0x3ff40000)
{ /* 0.84375 <= |x| < 1.25. */
double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d;
double a = fabs (x) - 1.0;
r1n = fma (a, NB[1], NB[0]);
a2 = a * a;
r1d = fma (a, DB[0], 1.0);
a4 = a2 * a2;
r2n = fma (a, NB[3], NB[2]);
a6 = a4 * a2;
r2d = fma (a, DB[2], DB[1]);
r3n = fma (a, NB[5], NB[4]);
r3d = fma (a, DB[4], DB[3]);
r4n = NB[6];
r4d = DB[5];
double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n;
double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d;
if (sign)
return -C - P / Q;
else
return C + P / Q;
}
else if (ia < 0x40000000)
{ /* 1.25 <= |x| < 2.0. */
double a = fabs (x);
a = a - 1.25;
double r1 = fma (a, PC[1], PC[0]);
double r2 = fma (a, PC[3], PC[2]);
double r3 = fma (a, PC[5], PC[4]);
double r4 = fma (a, PC[7], PC[6]);
double r5 = fma (a, PC[9], PC[8]);
double r6 = fma (a, PC[11], PC[10]);
double r7 = fma (a, PC[13], PC[12]);
double r8 = fma (a, PC[15], PC[14]);
double a2 = a * a;
double r = r8;
r = fma (a2, r, r7);
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else if (ia < 0x400a0000)
{ /* 2 <= |x| < 3.25. */
double a = fabs (x);
a = fma (0.5, a, -1.0);
double r1 = fma (a, PD[1], PD[0]);
double r2 = fma (a, PD[3], PD[2]);
double r3 = fma (a, PD[5], PD[4]);
double r4 = fma (a, PD[7], PD[6]);
double r5 = fma (a, PD[9], PD[8]);
double r6 = fma (a, PD[11], PD[10]);
double r7 = fma (a, PD[13], PD[12]);
double r8 = fma (a, PD[15], PD[14]);
double r9 = fma (a, PD[17], PD[16]);
double a2 = a * a;
double r = r9;
r = fma (a2, r, r8);
r = fma (a2, r, r7);
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else if (ia < 0x40100000)
{ /* 3.25 <= |x| < 4.0. */
double a = fabs (x);
a = a - 3.25;
double r1 = fma (a, PE[1], PE[0]);
double r2 = fma (a, PE[3], PE[2]);
double r3 = fma (a, PE[5], PE[4]);
double r4 = fma (a, PE[7], PE[6]);
double r5 = fma (a, PE[9], PE[8]);
double r6 = fma (a, PE[11], PE[10]);
double r7 = fma (a, PE[13], PE[12]);
double a2 = a * a;
double r = r7;
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else if (ia < 0x4017a000)
{ /* 4 <= |x| < 5.90625. */
double a = fabs (x);
a = fma (0.5, a, -2.0);
double r1 = fma (a, PF[1], PF[0]);
double r2 = fma (a, PF[3], PF[2]);
double r3 = fma (a, PF[5], PF[4]);
double r4 = fma (a, PF[7], PF[6]);
double r5 = fma (a, PF[9], PF[8]);
double r6 = fma (a, PF[11], PF[10]);
double r7 = fma (a, PF[13], PF[12]);
double r8 = fma (a, PF[15], PF[14]);
double r9 = PF[16];
double a2 = a * a;
double r = r9;
r = fma (a2, r, r8);
r = fma (a2, r, r7);
r = fma (a2, r, r6);
r = fma (a2, r, r5);
r = fma (a2, r, r4);
r = fma (a2, r, r3);
r = fma (a2, r, r2);
r = fma (a2, r, r1);
if (sign)
return -1.0 + r;
else
return 1.0 - r;
}
else
{
/* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1. */
if (unlikely (ia >= 0x7ff00000))
return (double) (1.0 - (sign << 1)) + 1.0 / x;
if (sign)
return -1.0;
else
return 1.0;
}
}

View file

@ -0,0 +1,85 @@
/*
* Shared data between erf and erfc.
*
* Copyright (c) 2019-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
/*
Minimax approximation of erf
*/
const struct erf_data __erf_data = {
.erf_poly_A = {
#if ERF_POLY_A_NCOEFFS == 10
0x1.06eba8214db68p-3, -0x1.812746b037948p-2, 0x1.ce2f21a03872p-4,
-0x1.b82ce30e6548p-6, 0x1.565bcc360a2f2p-8, -0x1.c02d812bc979ap-11,
0x1.f99bddfc1ebe9p-14, -0x1.f42c457cee912p-17, 0x1.b0e414ec20ee9p-20,
-0x1.18c47fd143c5ep-23
#endif
},
/* Rational approximation on [0x1p-28, 0.84375] */
.erf_ratio_N_A = {
0x1.06eba8214db68p-3, -0x1.4cd7d691cb913p-2, -0x1.d2a51dbd7194fp-6,
-0x1.7a291236668e4p-8, -0x1.8ead6120016acp-16
},
.erf_ratio_D_A = {
0x1.97779cddadc09p-2, 0x1.0a54c5536cebap-4, 0x1.4d022c4d36b0fp-8,
0x1.15dc9221c1a1p-13, -0x1.09c4342a2612p-18
},
/* Rational approximation on [0.84375, 1.25] */
.erf_ratio_N_B = {
-0x1.359b8bef77538p-9, 0x1.a8d00ad92b34dp-2, -0x1.7d240fbb8c3f1p-2,
0x1.45fca805120e4p-2, -0x1.c63983d3e28ecp-4, 0x1.22a36599795ebp-5,
-0x1.1bf380a96073fp-9
},
.erf_ratio_D_B = {
0x1.b3e6618eee323p-4, 0x1.14af092eb6f33p-1, 0x1.2635cd99fe9a7p-4,
0x1.02660e763351fp-3, 0x1.bedc26b51dd1cp-7, 0x1.88b545735151dp-7
},
.erfc_poly_C = {
#if ERFC_POLY_C_NCOEFFS == 16
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25 */
0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2,
-0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5,
-0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8,
-0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12,
0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16,
-0x1.578c9e375d37p-19
#endif
},
.erfc_poly_D = {
#if ERFC_POLY_D_NCOEFFS == 18
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2 */
0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3,
-0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2,
-0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2,
0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3,
-0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3,
0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10
#endif
},
.erfc_poly_E = {
#if ERFC_POLY_E_NCOEFFS == 14
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25 */
0x1.20c13035539e4p-18, -0x1.e9b5e8d16df7ep-16, 0x1.8de3cd4733bf9p-14,
-0x1.9aa48beb8382fp-13, 0x1.2c7d713370a9fp-12, -0x1.490b12110b9e2p-12,
0x1.1459c5d989d23p-12, -0x1.64b28e9f1269p-13, 0x1.57c76d9d05cf8p-14,
-0x1.bf271d9951cf8p-16, 0x1.db7ea4d4535c9p-19, 0x1.91c2e102d5e49p-20,
-0x1.e9f0826c2149ep-21, 0x1.60eebaea236e1p-23
#endif
},
.erfc_poly_F = {
#if ERFC_POLY_F_NCOEFFS == 17
/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4 */
0x1.08ddd130d1fa6p-26, -0x1.10b146f59ff06p-22, 0x1.10b135328b7b2p-19,
-0x1.6039988e7575fp-17, 0x1.497d365e19367p-15, -0x1.da48d9afac83ep-14,
0x1.1024c9b1fbb48p-12, -0x1.fc962e7066272p-12, 0x1.87297282d4651p-11,
-0x1.f057b255f8c59p-11, 0x1.0228d0eee063p-10, -0x1.b1b21b84ec41cp-11,
0x1.1ead8ae9e1253p-11, -0x1.1e708fba37fccp-12, 0x1.9559363991edap-14,
-0x1.68c827b783d9cp-16, 0x1.2ec4adeccf4a2p-19
#endif
}
};

View file

@ -0,0 +1,104 @@
/*
* Single-precision erf(x) function.
*
* Copyright (c) 2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <stdint.h>
#include <math.h>
#include "math_config.h"
#define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
#define A __erff_data.erff_poly_A
#define B __erff_data.erff_poly_B
/* Top 12 bits of a float. */
static inline uint32_t
top12 (float x)
{
return asuint (x) >> 20;
}
/* Efficient implementation of erff
using either a pure polynomial approximation or
the exponential of a polynomial.
Worst-case error is 1.09ulps at 0x1.c111acp-1. */
float
erff (float x)
{
float r, x2, u;
/* Get top word. */
uint32_t ix = asuint (x);
uint32_t sign = ix >> 31;
uint32_t ia12 = top12 (x) & 0x7ff;
/* Limit of both intervals is 0.875 for performance reasons but coefficients
computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy
from 0.94 to 1.1ulps. */
if (ia12 < 0x3f6)
{ /* a = |x| < 0.875. */
/* Tiny and subnormal cases. */
if (unlikely (ia12 < 0x318))
{ /* |x| < 2^(-28). */
if (unlikely (ia12 < 0x040))
{ /* |x| < 2^(-119). */
float y = fmaf (TwoOverSqrtPiMinusOne, x, x);
return check_uflowf (y);
}
return x + TwoOverSqrtPiMinusOne * x;
}
x2 = x * x;
/* Normalized cases (|x| < 0.921875). Use Horner scheme for x+x*P(x^2). */
r = A[5];
r = fmaf (r, x2, A[4]);
r = fmaf (r, x2, A[3]);
r = fmaf (r, x2, A[2]);
r = fmaf (r, x2, A[1]);
r = fmaf (r, x2, A[0]);
r = fmaf (r, x, x);
}
else if (ia12 < 0x408)
{ /* |x| < 4.0 - Use a custom Estrin scheme. */
float a = fabsf (x);
/* Start with Estrin scheme on high order (small magnitude) coefficients. */
r = fmaf (B[6], a, B[5]);
u = fmaf (B[4], a, B[3]);
x2 = x * x;
r = fmaf (r, x2, u);
/* Then switch to pure Horner scheme. */
r = fmaf (r, a, B[2]);
r = fmaf (r, a, B[1]);
r = fmaf (r, a, B[0]);
r = fmaf (r, a, a);
/* Single precision exponential with ~0.5ulps,
ensures erff has max. rel. error
< 1ulp on [0.921875, 4.0],
< 1.1ulps on [0.875, 4.0]. */
r = expf (-r);
/* Explicit copysign (calling copysignf increases latency). */
if (sign)
r = -1.0f + r;
else
r = 1.0f - r;
}
else
{ /* |x| >= 4.0. */
/* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1. */
if (unlikely (ia12 >= 0x7f8))
return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x;
/* Explicit copysign (calling copysignf increases latency). */
if (sign)
r = -1.0f;
else
r = 1.0f;
}
return r;
}

View file

@ -0,0 +1,22 @@
/*
* Data for approximation of erff.
*
* Copyright (c) 2019-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
/* Minimax approximation of erff. */
const struct erff_data __erff_data = {
.erff_poly_A = {
0x1.06eba6p-03f, -0x1.8126e0p-02f, 0x1.ce1a46p-04f,
-0x1.b68bd2p-06f, 0x1.473f48p-08f, -0x1.3a1a82p-11f
},
.erff_poly_B = {
0x1.079d0cp-3f, 0x1.450aa0p-1f, 0x1.b55cb0p-4f,
-0x1.8d6300p-6f, 0x1.fd1336p-9f, -0x1.91d2ccp-12f,
0x1.222900p-16f
}
};

View file

@ -0,0 +1,176 @@
/*
* Double-precision e^x function.
*
* Copyright (c) 2018-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <float.h>
#include <math.h>
#include <stdint.h>
#include "math_config.h"
#define N (1 << EXP_TABLE_BITS)
#define InvLn2N __exp_data.invln2N
#define NegLn2hiN __exp_data.negln2hiN
#define NegLn2loN __exp_data.negln2loN
#define Shift __exp_data.shift
#define T __exp_data.tab
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
if ((ki & 0x80000000) == 0)
{
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble (sbits);
y = 0x1p1009 * (scale + scale * tmp);
return check_oflow (eval_as_double (y));
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble (sbits);
y = scale + scale * tmp;
if (y < 1.0)
{
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double (hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t
top12 (double x)
{
return asuint64 (x) >> 52;
}
/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
If hastail is 0 then xtail is assumed to be 0 too. */
static inline double
exp_inline (double x, double xtail, int hastail)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
{
if (abstop - top12 (0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12 (1024.0))
{
if (asuint64 (x) == asuint64 (-INFINITY))
return 0.0;
if (abstop >= top12 (INFINITY))
return 1.0 + x;
if (asuint64 (x) >> 63)
return __math_uflow (0);
else
return __math_oflow (0);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint (z);
ki = converttoint (z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd) >> 16;
kd = (double_t) (int32_t) ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
if (hastail)
r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
#if EXP_POLY_ORDER == 4
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP_POLY_ORDER == 5
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP_POLY_ORDER == 6
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
}
double
exp (double x)
{
return exp_inline (x, 0, 0);
}
/* May be useful for implementing pow where more than double
precision input is needed. */
double
__exp_dd (double x, double xtail)
{
return exp_inline (x, xtail, 1);
}
#if USE_GLIBC_ABI
strong_alias (exp, __exp_finite)
hidden_alias (exp, __ieee754_exp)
hidden_alias (__exp_dd, __exp1)
# if LDBL_MANT_DIG == 53
long double expl (long double x) { return exp (x); }
# endif
#endif

View file

@ -0,0 +1,143 @@
/*
* Double-precision 2^x function.
*
* Copyright (c) 2018-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <float.h>
#include <math.h>
#include <stdint.h>
#include "math_config.h"
#define N (1 << EXP_TABLE_BITS)
#define Shift __exp_data.exp2_shift
#define T __exp_data.tab
#define C1 __exp_data.exp2_poly[0]
#define C2 __exp_data.exp2_poly[1]
#define C3 __exp_data.exp2_poly[2]
#define C4 __exp_data.exp2_poly[3]
#define C5 __exp_data.exp2_poly[4]
#define C6 __exp_data.exp2_poly[5]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
if ((ki & 0x80000000) == 0)
{
/* k > 0, the exponent of scale might have overflowed by 1. */
sbits -= 1ull << 52;
scale = asdouble (sbits);
y = 2 * (scale + scale * tmp);
return check_oflow (eval_as_double (y));
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble (sbits);
y = scale + scale * tmp;
if (y < 1.0)
{
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double (hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t
top12 (double x)
{
return asuint64 (x) >> 52;
}
double
exp2 (double x)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, r, r2, scale, tail, tmp;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
{
if (abstop - top12 (0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12 (1024.0))
{
if (asuint64 (x) == asuint64 (-INFINITY))
return 0.0;
if (abstop >= top12 (INFINITY))
return 1.0 + x;
if (!(asuint64 (x) >> 63))
return __math_oflow (0);
else if (asuint64 (x) >= asuint64 (-1075.0))
return __math_uflow (0);
}
if (2 * asuint64 (x) > 2 * asuint64 (928.0))
/* Large x is special cased below. */
abstop = 0;
}
/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
kd = eval_as_double (x + Shift);
ki = asuint64 (kd); /* k. */
kd -= Shift; /* k/N for int k. */
r = x - kd;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.5/N ulp larger. */
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
#if EXP2_POLY_ORDER == 4
tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP2_POLY_ORDER == 5
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP2_POLY_ORDER == 6
tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
}
#if USE_GLIBC_ABI
strong_alias (exp2, __exp2_finite)
hidden_alias (exp2, __ieee754_exp2)
# if LDBL_MANT_DIG == 53
long double exp2l (long double x) { return exp2 (x); }
# endif
#endif

View file

@ -0,0 +1,80 @@
/*
* Single-precision 2^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "math_config.h"
/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
ULP error: 0.502 (nearest rounding.)
Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
Wrong count: 168353 (all nearest rounding wrong results with fma.)
Non-nearest ULP error: 1 (rounded ULP error)
*/
#define N (1 << EXP2F_TABLE_BITS)
#define T __exp2f_data.tab
#define C __exp2f_data.poly
#define SHIFT __exp2f_data.shift_scaled
static inline uint32_t
top12 (float x)
{
return asuint (x) >> 20;
}
float
exp2f (float x)
{
uint32_t abstop;
uint64_t ki, t;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, xd, z, r, r2, y, s;
xd = (double_t) x;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop >= top12 (128.0f)))
{
/* |x| >= 128 or x is nan. */
if (asuint (x) == asuint (-INFINITY))
return 0.0f;
if (abstop >= top12 (INFINITY))
return x + x;
if (x > 0.0f)
return __math_oflowf (0);
if (x <= -150.0f)
return __math_uflowf (0);
#if WANT_ERRNO_UFLOW
if (x < -149.0f)
return __math_may_uflowf (0);
#endif
}
/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
kd = eval_as_double (xd + SHIFT);
ki = asuint64 (kd);
kd -= SHIFT; /* k/N for int k. */
r = xd - kd;
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble (t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (exp2f, __exp2f_finite)
hidden_alias (exp2f, __ieee754_exp2f)
#endif

View file

@ -0,0 +1,78 @@
/*
* Shared data between expf, exp2f and powf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
#define N (1 << EXP2F_TABLE_BITS)
const struct exp2f_data __exp2f_data = {
/* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
used for computing 2^(k/N) for an int |k| < 150 N as
double(tab[k%N] + (k << 52-BITS)) */
.tab = {
#if N == 8
0x3ff0000000000000, 0x3fef72b83c7d517b, 0x3fef06fe0a31b715, 0x3feebfdad5362a27,
0x3feea09e667f3bcd, 0x3feeace5422aa0db, 0x3feee89f995ad3ad, 0x3fef5818dcfba487,
#elif N == 16
0x3ff0000000000000, 0x3fefb5586cf9890f, 0x3fef72b83c7d517b, 0x3fef387a6e756238,
0x3fef06fe0a31b715, 0x3feedea64c123422, 0x3feebfdad5362a27, 0x3feeab07dd485429,
0x3feea09e667f3bcd, 0x3feea11473eb0187, 0x3feeace5422aa0db, 0x3feec49182a3f090,
0x3feee89f995ad3ad, 0x3fef199bdd85529c, 0x3fef5818dcfba487, 0x3fefa4afa2a490da,
#elif N == 32
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
#elif N == 64
0x3ff0000000000000, 0x3fefec9a3e778061, 0x3fefd9b0d3158574, 0x3fefc74518759bc8,
0x3fefb5586cf9890f, 0x3fefa3ec32d3d1a2, 0x3fef9301d0125b51, 0x3fef829aaea92de0,
0x3fef72b83c7d517b, 0x3fef635beb6fcb75, 0x3fef54873168b9aa, 0x3fef463b88628cd6,
0x3fef387a6e756238, 0x3fef2b4565e27cdd, 0x3fef1e9df51fdee1, 0x3fef1285a6e4030b,
0x3fef06fe0a31b715, 0x3feefc08b26416ff, 0x3feef1a7373aa9cb, 0x3feee7db34e59ff7,
0x3feedea64c123422, 0x3feed60a21f72e2a, 0x3feece086061892d, 0x3feec6a2b5c13cd0,
0x3feebfdad5362a27, 0x3feeb9b2769d2ca7, 0x3feeb42b569d4f82, 0x3feeaf4736b527da,
0x3feeab07dd485429, 0x3feea76f15ad2148, 0x3feea47eb03a5585, 0x3feea23882552225,
0x3feea09e667f3bcd, 0x3fee9fb23c651a2f, 0x3fee9f75e8ec5f74, 0x3fee9feb564267c9,
0x3feea11473eb0187, 0x3feea2f336cf4e62, 0x3feea589994cce13, 0x3feea8d99b4492ed,
0x3feeace5422aa0db, 0x3feeb1ae99157736, 0x3feeb737b0cdc5e5, 0x3feebd829fde4e50,
0x3feec49182a3f090, 0x3feecc667b5de565, 0x3feed503b23e255d, 0x3feede6b5579fdbf,
0x3feee89f995ad3ad, 0x3feef3a2b84f15fb, 0x3feeff76f2fb5e47, 0x3fef0c1e904bc1d2,
0x3fef199bdd85529c, 0x3fef27f12e57d14b, 0x3fef3720dcef9069, 0x3fef472d4a07897c,
0x3fef5818dcfba487, 0x3fef69e603db3285, 0x3fef7c97337b9b5f, 0x3fef902ee78b3ff6,
0x3fefa4afa2a490da, 0x3fefba1bee615a27, 0x3fefd0765b6e4540, 0x3fefe7c1819e90d8,
#endif
},
.shift_scaled = 0x1.8p+52 / N,
.poly = {
#if N == 8
0x1.c6a00335106e2p-5, 0x1.ec0c313449f55p-3, 0x1.62e431111f69fp-1,
#elif N == 16
0x1.c6ac6aa313963p-5, 0x1.ebfff4532d9bap-3, 0x1.62e43001bc49fp-1,
#elif N == 32
0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
#elif N == 64
0x1.c6b04b4221b2ap-5, 0x1.ebfc213e184d7p-3, 0x1.62e42fefb5b7fp-1,
#endif
},
.shift = 0x1.8p+52,
.invln2_scaled = 0x1.71547652b82fep+0 * N,
.poly_scaled = {
#if N == 8
0x1.c6a00335106e2p-5/N/N/N, 0x1.ec0c313449f55p-3/N/N, 0x1.62e431111f69fp-1/N,
#elif N == 16
0x1.c6ac6aa313963p-5/N/N/N, 0x1.ebfff4532d9bap-3/N/N, 0x1.62e43001bc49fp-1/N,
#elif N == 32
0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
#elif N == 64
0x1.c6b04b4221b2ap-5/N/N/N, 0x1.ebfc213e184d7p-3/N/N, 0x1.62e42fefb5b7fp-1/N,
#endif
},
};

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,91 @@
/*
* Single-precision e^x function.
*
* Copyright (c) 2017-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "math_config.h"
/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
ULP error: 0.502 (nearest rounding.)
Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
Wrong count: 170635 (all nearest rounding wrong results with fma.)
Non-nearest ULP error: 1 (rounded ULP error)
*/
#define N (1 << EXP2F_TABLE_BITS)
#define InvLn2N __exp2f_data.invln2_scaled
#define T __exp2f_data.tab
#define C __exp2f_data.poly_scaled
static inline uint32_t
top12 (float x)
{
return asuint (x) >> 20;
}
float
expf (float x)
{
uint32_t abstop;
uint64_t ki, t;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, xd, z, r, r2, y, s;
xd = (double_t) x;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop >= top12 (88.0f)))
{
/* |x| >= 88 or x is nan. */
if (asuint (x) == asuint (-INFINITY))
return 0.0f;
if (abstop >= top12 (INFINITY))
return x + x;
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
return __math_oflowf (0);
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
return __math_uflowf (0);
#if WANT_ERRNO_UFLOW
if (x < -0x1.9d1d9ep6f) /* x < log(0x1p-149) ~= -103.28 */
return __math_may_uflowf (0);
#endif
}
/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
z = InvLn2N * xd;
/* Round and convert z to int, the result is in [-150*N, 128*N] and
ideally nearest int is used, otherwise the magnitude of r can be
bigger which gives larger approximation error. */
#if TOINT_INTRINSICS
kd = roundtoint (z);
ki = converttoint (z);
#else
# define SHIFT __exp2f_data.shift
kd = eval_as_double (z + SHIFT);
ki = asuint64 (kd);
kd -= SHIFT;
#endif
r = z - kd;
/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble (t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (expf, __expf_finite)
hidden_alias (expf, __ieee754_expf)
#endif

View file

@ -0,0 +1,100 @@
/*
* Public API.
*
* Copyright (c) 2015-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _MATHLIB_H
#define _MATHLIB_H
float expf (float);
float exp2f (float);
float logf (float);
float log2f (float);
float powf (float, float);
float sinf (float);
float cosf (float);
void sincosf (float, float*, float*);
double exp (double);
double exp2 (double);
double log (double);
double log2 (double);
double pow (double, double);
/* Scalar functions using the vector algorithm with identical result. */
float __s_sinf (float);
float __s_cosf (float);
float __s_expf (float);
float __s_expf_1u (float);
float __s_exp2f (float);
float __s_exp2f_1u (float);
float __s_logf (float);
float __s_powf (float, float);
double __s_sin (double);
double __s_cos (double);
double __s_exp (double);
double __s_log (double);
double __s_pow (double, double);
#if __aarch64__
#if __GNUC__ >= 5
typedef __Float32x4_t __f32x4_t;
typedef __Float64x2_t __f64x2_t;
#elif __clang_major__*100+__clang_minor__ >= 305
typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t;
typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t;
#else
#error Unsupported compiler
#endif
/* Vector functions following the base PCS. */
__f32x4_t __v_sinf (__f32x4_t);
__f32x4_t __v_cosf (__f32x4_t);
__f32x4_t __v_expf (__f32x4_t);
__f32x4_t __v_expf_1u (__f32x4_t);
__f32x4_t __v_exp2f (__f32x4_t);
__f32x4_t __v_exp2f_1u (__f32x4_t);
__f32x4_t __v_logf (__f32x4_t);
__f32x4_t __v_powf (__f32x4_t, __f32x4_t);
__f64x2_t __v_sin (__f64x2_t);
__f64x2_t __v_cos (__f64x2_t);
__f64x2_t __v_exp (__f64x2_t);
__f64x2_t __v_log (__f64x2_t);
__f64x2_t __v_pow (__f64x2_t, __f64x2_t);
#if __GNUC__ >= 9 || __clang_major__ >= 8
#define __vpcs __attribute__((__aarch64_vector_pcs__))
/* Vector functions following the vector PCS. */
__vpcs __f32x4_t __vn_sinf (__f32x4_t);
__vpcs __f32x4_t __vn_cosf (__f32x4_t);
__vpcs __f32x4_t __vn_expf (__f32x4_t);
__vpcs __f32x4_t __vn_expf_1u (__f32x4_t);
__vpcs __f32x4_t __vn_exp2f (__f32x4_t);
__vpcs __f32x4_t __vn_exp2f_1u (__f32x4_t);
__vpcs __f32x4_t __vn_logf (__f32x4_t);
__vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t);
__vpcs __f64x2_t __vn_sin (__f64x2_t);
__vpcs __f64x2_t __vn_cos (__f64x2_t);
__vpcs __f64x2_t __vn_exp (__f64x2_t);
__vpcs __f64x2_t __vn_log (__f64x2_t);
__vpcs __f64x2_t __vn_pow (__f64x2_t, __f64x2_t);
/* Vector functions following the vector PCS using ABI names. */
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
#endif
#endif
#endif

View file

@ -0,0 +1,162 @@
/*
* Double-precision log(x) function.
*
* Copyright (c) 2018-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <float.h>
#include <math.h>
#include <stdint.h>
#include "math_config.h"
#define T __log_data.tab
#define T2 __log_data.tab2
#define B __log_data.poly1
#define A __log_data.poly
#define Ln2hi __log_data.ln2hi
#define Ln2lo __log_data.ln2lo
#define N (1 << LOG_TABLE_BITS)
#define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */
static inline uint32_t
top16 (double x)
{
return asuint64 (x) >> 48;
}
double
log (double x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64 (x);
top = top16 (x);
#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
# define LO asuint64 (1.0 - 0x1p-5)
# define HI asuint64 (1.0 + 0x1.1p-5)
#elif LOG_POLY1_ORDER == 12
# define LO asuint64 (1.0 - 0x1p-4)
# define HI asuint64 (1.0 + 0x1.09p-4)
#endif
if (unlikely (ix - LO < HI - LO))
{
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
return 0;
r = x - 1.0;
r2 = r * r;
r3 = r * r2;
#if LOG_POLY1_ORDER == 10
/* Worst-case error is around 0.516 ULP. */
y = r3 * (B[1] + r * B[2] + r2 * B[3]
+ r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
#elif LOG_POLY1_ORDER == 11
/* Worst-case error is around 0.516 ULP. */
y = r3 * (B[1] + r * B[2]
+ r2 * (B[3] + r * B[4] + r2 * B[5]
+ r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
#elif LOG_POLY1_ORDER == 12
y = r3 * (B[1] + r * B[2] + r2 * B[3]
+ r3 * (B[4] + r * B[5] + r2 * B[6]
+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
# if N <= 64
/* Worst-case error is around 0.532 ULP. */
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
# else
/* Worst-case error is around 0.507 ULP. */
w = r * 0x1p27;
double_t rhi = r + w - w;
double_t rlo = r - rhi;
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
hi = r + w;
lo = r - hi + w;
lo += B[0] * rlo * (rhi + r);
y += lo;
y += hi;
# endif
#endif
return eval_as_double (y);
}
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
{
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero (1);
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid (x);
/* x is subnormal, normalize it. */
ix = asuint64 (x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble (iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
#else
/* rounding error: 0x1p-55/N + 0x1p-66. */
r = (z - T2[i].chi - T2[i].clo) * invc;
#endif
kd = (double_t) k;
/* hi + lo = r + log(c) + k*Ln2. */
w = kd * Ln2hi + logc;
hi = w + r;
lo = w - hi + r + kd * Ln2lo;
/* log(x) = lo + (log1p(r) - r) + hi. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
/* Worst case error if |y| > 0x1p-5:
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
Worst case error if |y| > 0x1p-4:
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
#if LOG_POLY_ORDER == 6
y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
#elif LOG_POLY_ORDER == 7
y = lo
+ r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
+ r2 * r2 * (A[4] + r * A[5]))
+ hi;
#endif
return eval_as_double (y);
}
#if USE_GLIBC_ABI
strong_alias (log, __log_finite)
hidden_alias (log, __ieee754_log)
# if LDBL_MANT_DIG == 53
long double logl (long double x) { return log (x); }
# endif
#endif

View file

@ -0,0 +1,141 @@
/*
* Double-precision log2(x) function.
*
* Copyright (c) 2018-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <float.h>
#include <math.h>
#include <stdint.h>
#include "math_config.h"
#define T __log2_data.tab
#define T2 __log2_data.tab2
#define B __log2_data.poly1
#define A __log2_data.poly
#define InvLn2hi __log2_data.invln2hi
#define InvLn2lo __log2_data.invln2lo
#define N (1 << LOG2_TABLE_BITS)
#define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */
static inline uint32_t
top16 (double x)
{
return asuint64 (x) >> 48;
}
double
log2 (double x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64 (x);
top = top16 (x);
#if LOG2_POLY1_ORDER == 11
# define LO asuint64 (1.0 - 0x1.5b51p-5)
# define HI asuint64 (1.0 + 0x1.6ab2p-5)
#endif
if (unlikely (ix - LO < HI - LO))
{
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
return 0;
r = x - 1.0;
#if HAVE_FAST_FMA
hi = r * InvLn2hi;
lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
#else
double_t rhi, rlo;
rhi = asdouble (asuint64 (r) & -1ULL << 32);
rlo = r - rhi;
hi = rhi * InvLn2hi;
lo = rlo * InvLn2hi + r * InvLn2lo;
#endif
r2 = r * r; /* rounding error: 0x1p-62. */
r4 = r2 * r2;
#if LOG2_POLY1_ORDER == 11
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
p = r2 * (B[0] + r * B[1]);
y = hi + p;
lo += hi - y + p;
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
+ r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
y += lo;
#endif
return eval_as_double (y);
}
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
{
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero (1);
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid (x);
/* x is subnormal, normalize it. */
ix = asuint64 (x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble (iz);
kd = (double_t) k;
/* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
t1 = r * InvLn2hi;
t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
#else
double_t rhi, rlo;
/* rounding error: 0x1p-55/N + 0x1p-65. */
r = (z - T2[i].chi - T2[i].clo) * invc;
rhi = asdouble (asuint64 (r) & -1ULL << 32);
rlo = r - rhi;
t1 = rhi * InvLn2hi;
t2 = rlo * InvLn2hi + r * InvLn2lo;
#endif
/* hi + lo = r/ln2 + log2(c) + k. */
t3 = kd + logc;
hi = t3 + t1;
lo = t3 - hi + t1 + t2;
/* log2(r+1) = r/ln2 + r^2*poly(r). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
r4 = r2 * r2;
#if LOG2_POLY_ORDER == 7
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
y = lo + r2 * p + hi;
#endif
return eval_as_double (y);
}
#if USE_GLIBC_ABI
strong_alias (log2, __log2_finite)
hidden_alias (log2, __ieee754_log2)
# if LDBL_MANT_DIG == 53
long double log2l (long double x) { return log2 (x); }
# endif
#endif

View file

@ -0,0 +1,209 @@
/*
* Data for log2.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
#define N (1 << LOG2_TABLE_BITS)
const struct log2_data __log2_data = {
// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0
.invln2hi = 0x1.7154765200000p+0,
.invln2lo = 0x1.705fc2eefa200p-33,
.poly1 = {
#if LOG2_POLY1_ORDER == 11
// relative error: 0x1.2fad8188p-63
// in -0x1.5b51p-5 0x1.6ab2p-5
-0x1.71547652b82fep-1,
0x1.ec709dc3a03f7p-2,
-0x1.71547652b7c3fp-2,
0x1.2776c50f05be4p-2,
-0x1.ec709dd768fe5p-3,
0x1.a61761ec4e736p-3,
-0x1.7153fbc64a79bp-3,
0x1.484d154f01b4ap-3,
-0x1.289e4a72c383cp-3,
0x1.0b32f285aee66p-3,
#endif
},
.poly = {
#if N == 64 && LOG2_POLY_ORDER == 7
// relative error: 0x1.a72c2bf8p-58
// abs error: 0x1.67a552c8p-66
// in -0x1.f45p-8 0x1.f45p-8
-0x1.71547652b8339p-1,
0x1.ec709dc3a04bep-2,
-0x1.7154764702ffbp-2,
0x1.2776c50034c48p-2,
-0x1.ec7b328ea92bcp-3,
0x1.a6225e117f92ep-3,
#endif
},
/* Algorithm:
x = 2^k z
log2(x) = k + log2(c) + log2(z/c)
log2(z/c) = poly(z/c - 1)
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
into the ith one, then table entries are computed as
tab[i].invc = 1/c
tab[i].logc = (double)log2(c)
tab2[i].chi = (double)c
tab2[i].clo = (double)(c - (double)c)
where c is near the center of the subinterval and is chosen by trying +-2^29
floating point invc candidates around 1/center and selecting one for which
1) the rounding error in 0x1.8p10 + logc is 0,
2) the rounding error in z - chi - clo is < 0x1p-64 and
3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
Note: 1) ensures that k + logc can be computed without rounding error, 2)
ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = {
#if N == 64
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
{0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
#endif
},
#if !HAVE_FAST_FMA
.tab2 = {
# if N == 64
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
{0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
# endif
},
#endif /* !HAVE_FAST_FMA */
};

View file

@ -0,0 +1,80 @@
/*
* Single-precision log2 function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "math_config.h"
/*
LOG2F_TABLE_BITS = 4
LOG2F_POLY_ORDER = 4
ULP error: 0.752 (nearest rounding.)
Relative error: 1.9 * 2^-26 (before rounding.)
*/
#define N (1 << LOG2F_TABLE_BITS)
#define T __log2f_data.tab
#define A __log2f_data.poly
#define OFF 0x3f330000
float
log2f (float x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, p, y, y0, invc, logc;
uint32_t ix, iz, top, tmp;
int k, i;
ix = asuint (x);
#if WANT_ROUNDING
/* Fix sign of zero with downward rounding when x==1. */
if (unlikely (ix == 0x3f800000))
return 0;
#endif
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
{
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof (1);
if (ix == 0x7f800000) /* log2(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf (x);
/* x is subnormal, normalize it. */
ix = asuint (x * 0x1p23f);
ix -= 23 << 23;
}
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t) tmp >> 23; /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t) asfloat (iz);
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t) k;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
p = A[3] * r + y0;
y = y * r2 + p;
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (log2f, __log2f_finite)
hidden_alias (log2f, __ieee754_log2f)
#endif

View file

@ -0,0 +1,33 @@
/*
* Data definition for log2f.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
const struct log2f_data __log2f_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
{ 0x1p+0, 0x0p+0 },
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
},
.poly = {
-0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
0x1.715475f35c8b8p0,
}
};

View file

@ -0,0 +1,511 @@
/*
* Data for log.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
#define N (1 << LOG_TABLE_BITS)
const struct log_data __log_data = {
.ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45,
.poly1 = {
#if LOG_POLY1_ORDER == 10
// relative error: 0x1.32eccc6p-62
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
-0x1p-1,
0x1.55555555554e5p-2,
-0x1.0000000000af2p-2,
0x1.9999999bbe436p-3,
-0x1.55555537f9cdep-3,
0x1.24922fc8127cfp-3,
-0x1.0000b7d6bb612p-3,
0x1.c806ee1ddbcafp-4,
-0x1.972335a9c2d6ep-4,
#elif LOG_POLY1_ORDER == 11
// relative error: 0x1.52c8b708p-68
// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
-0x1p-1,
0x1.5555555555555p-2,
-0x1.ffffffffffea9p-3,
0x1.999999999c4d4p-3,
-0x1.55555557f5541p-3,
0x1.249248fbe33e4p-3,
-0x1.ffffc9a3c825bp-4,
0x1.c71e1f204435dp-4,
-0x1.9a7f26377d06ep-4,
0x1.71c30cf8f7364p-4,
#elif LOG_POLY1_ORDER == 12
// relative error: 0x1.c04d76cp-63
// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
-0x1p-1,
0x1.5555555555577p-2,
-0x1.ffffffffffdcbp-3,
0x1.999999995dd0cp-3,
-0x1.55555556745a7p-3,
0x1.24924a344de3p-3,
-0x1.fffffa4423d65p-4,
0x1.c7184282ad6cap-4,
-0x1.999eb43b068ffp-4,
0x1.78182f7afd085p-4,
-0x1.5521375d145cdp-4,
#endif
},
.poly = {
#if N == 64 && LOG_POLY_ORDER == 7
// relative error: 0x1.906eb8ap-58
// abs error: 0x1.d2cad5a8p-67
// in -0x1.fp-8 0x1.fp-8
-0x1.0000000000027p-1,
0x1.555555555556ap-2,
-0x1.fffffff0440bap-3,
0x1.99999991906c3p-3,
-0x1.555c8d7e8201ep-3,
0x1.24978c59151fap-3,
#elif N == 128 && LOG_POLY_ORDER == 6
// relative error: 0x1.926199e8p-56
// abs error: 0x1.882ff33p-65
// in -0x1.fp-9 0x1.fp-9
-0x1.0000000000001p-1,
0x1.555555551305bp-2,
-0x1.fffffffeb459p-3,
0x1.999b324f10111p-3,
-0x1.55575e506c89fp-3,
#elif N == 128 && LOG_POLY_ORDER == 7
// relative error: 0x1.649fc4bp-64
// abs error: 0x1.c3b5769p-74
// in -0x1.fp-9 0x1.fp-9
-0x1.0000000000001p-1,
0x1.5555555555556p-2,
-0x1.fffffffea1a8p-3,
0x1.99999998e9139p-3,
-0x1.555776801b968p-3,
0x1.2493c29331a5cp-3,
#endif
},
/* Algorithm:
x = 2^k z
log(x) = k ln2 + log(c) + log(z/c)
log(z/c) = poly(z/c - 1)
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
into the ith one, then table entries are computed as
tab[i].invc = 1/c
tab[i].logc = (double)log(c)
tab2[i].chi = (double)c
tab2[i].clo = (double)(c - (double)c)
where c is near the center of the subinterval and is chosen by trying +-2^29
floating point invc candidates around 1/center and selecting one for which
1) the rounding error in 0x1.8p9 + logc is 0,
2) the rounding error in z - chi - clo is < 0x1p-66 and
3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when
|log(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = {
#if N == 64
{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
#elif N == 128
{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
{0x1.008040614b195p+0, -0x1.0040979240000p-9},
{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
#endif
},
#if !HAVE_FAST_FMA
.tab2 = {
# if N == 64
{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
# elif N == 128
{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
#endif
},
#endif /* !HAVE_FAST_FMA */
};

View file

@ -0,0 +1,79 @@
/*
* Single-precision log function.
*
* Copyright (c) 2017-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "math_config.h"
/*
LOGF_TABLE_BITS = 4
LOGF_POLY_ORDER = 4
ULP error: 0.818 (nearest rounding.)
Relative error: 1.957 * 2^-26 (before rounding.)
*/
#define T __logf_data.tab
#define A __logf_data.poly
#define Ln2 __logf_data.ln2
#define N (1 << LOGF_TABLE_BITS)
#define OFF 0x3f330000
float
logf (float x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, y, y0, invc, logc;
uint32_t ix, iz, tmp;
int k, i;
ix = asuint (x);
#if WANT_ROUNDING
/* Fix sign of zero with downward rounding when x==1. */
if (unlikely (ix == 0x3f800000))
return 0;
#endif
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
{
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof (1);
if (ix == 0x7f800000) /* log(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf (x);
/* x is subnormal, normalize it. */
ix = asuint (x * 0x1p23f);
ix -= 23 << 23;
}
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
k = (int32_t) tmp >> 23; /* arithmetic shift */
iz = ix - (tmp & 0x1ff << 23);
invc = T[i].invc;
logc = T[i].logc;
z = (double_t) asfloat (iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
r = z * invc - 1;
y0 = logc + (double_t) k * Ln2;
/* Pipelined polynomial evaluation to approximate log1p(r). */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
y = y * r2 + (y0 + r);
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (logf, __logf_finite)
hidden_alias (logf, __ieee754_logf)
#endif

View file

@ -0,0 +1,33 @@
/*
* Data definition for logf.
*
* Copyright (c) 2017-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
const struct logf_data __logf_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
{ 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
{ 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
{ 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
{ 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
{ 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
{ 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
{ 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
{ 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
{ 0x1p+0, 0x0p+0 },
{ 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
{ 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
{ 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
{ 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
{ 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
{ 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
},
.ln2 = 0x1.62e42fefa39efp-1,
.poly = {
-0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
}
};

View file

@ -0,0 +1,462 @@
/*
* Configuration for math routines.
*
* Copyright (c) 2017-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _MATH_CONFIG_H
#define _MATH_CONFIG_H
#include <math.h>
#include <stdint.h>
#ifndef WANT_ROUNDING
/* If defined to 1, return correct results for special cases in non-nearest
rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
This may be set to 0 if there is no fenv support or if math functions only
get called in round to nearest mode. */
# define WANT_ROUNDING 1
#endif
#ifndef WANT_ERRNO
/* If defined to 1, set errno in math functions according to ISO C. Many math
libraries do not set errno, so this is 0 by default. It may need to be
set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
# define WANT_ERRNO 0
#endif
#ifndef WANT_ERRNO_UFLOW
/* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */
# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
#endif
/* Compiler can inline round as a single instruction. */
#ifndef HAVE_FAST_ROUND
# if __aarch64__
# define HAVE_FAST_ROUND 1
# else
# define HAVE_FAST_ROUND 0
# endif
#endif
/* Compiler can inline lround, but not (long)round(x). */
#ifndef HAVE_FAST_LROUND
# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
# define HAVE_FAST_LROUND 1
# else
# define HAVE_FAST_LROUND 0
# endif
#endif
/* Compiler can inline fma as a single instruction. */
#ifndef HAVE_FAST_FMA
# if defined FP_FAST_FMA || __aarch64__
# define HAVE_FAST_FMA 1
# else
# define HAVE_FAST_FMA 0
# endif
#endif
/* Provide *_finite symbols and some of the glibc hidden symbols
so libmathlib can be used with binaries compiled against glibc
to interpose math functions with both static and dynamic linking. */
#ifndef USE_GLIBC_ABI
# if __GNUC__
# define USE_GLIBC_ABI 1
# else
# define USE_GLIBC_ABI 0
# endif
#endif
/* Optionally used extensions. */
#ifdef __GNUC__
# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
# define NOINLINE __attribute__ ((noinline))
# define UNUSED __attribute__ ((unused))
# define likely(x) __builtin_expect (!!(x), 1)
# define unlikely(x) __builtin_expect (x, 0)
# if __GNUC__ >= 9
# define attribute_copy(f) __attribute__ ((copy (f)))
# else
# define attribute_copy(f)
# endif
# define strong_alias(f, a) \
extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
# define hidden_alias(f, a) \
extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
attribute_copy (f);
#else
# define HIDDEN
# define NOINLINE
# define UNUSED
# define likely(x) (x)
# define unlikely(x) (x)
#endif
#if HAVE_FAST_ROUND
/* When set, the roundtoint and converttoint functions are provided with
the semantics documented below. */
# define TOINT_INTRINSICS 1
/* Round x to nearest int in all rounding modes, ties have to be rounded
consistently with converttoint so the results match. If the result
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
static inline double_t
roundtoint (double_t x)
{
return round (x);
}
/* Convert x to nearest int in all rounding modes, ties have to be rounded
consistently with roundtoint. If the result is not representible in an
int32_t then the semantics is unspecified. */
static inline int32_t
converttoint (double_t x)
{
# if HAVE_FAST_LROUND
return lround (x);
# else
return (long) round (x);
# endif
}
#endif
static inline uint32_t
asuint (float f)
{
union
{
float f;
uint32_t i;
} u = {f};
return u.i;
}
static inline float
asfloat (uint32_t i)
{
union
{
uint32_t i;
float f;
} u = {i};
return u.f;
}
static inline uint64_t
asuint64 (double f)
{
union
{
double f;
uint64_t i;
} u = {f};
return u.i;
}
static inline double
asdouble (uint64_t i)
{
union
{
uint64_t i;
double f;
} u = {i};
return u.f;
}
#ifndef IEEE_754_2008_SNAN
# define IEEE_754_2008_SNAN 1
#endif
static inline int
issignalingf_inline (float x)
{
uint32_t ix = asuint (x);
if (!IEEE_754_2008_SNAN)
return (ix & 0x7fc00000) == 0x7fc00000;
return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
}
static inline int
issignaling_inline (double x)
{
uint64_t ix = asuint64 (x);
if (!IEEE_754_2008_SNAN)
return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
}
#if __aarch64__ && __GNUC__
/* Prevent the optimization of a floating-point expression. */
static inline float
opt_barrier_float (float x)
{
__asm__ __volatile__ ("" : "+w" (x));
return x;
}
static inline double
opt_barrier_double (double x)
{
__asm__ __volatile__ ("" : "+w" (x));
return x;
}
/* Force the evaluation of a floating-point expression for its side-effect. */
static inline void
force_eval_float (float x)
{
__asm__ __volatile__ ("" : "+w" (x));
}
static inline void
force_eval_double (double x)
{
__asm__ __volatile__ ("" : "+w" (x));
}
#else
static inline float
opt_barrier_float (float x)
{
volatile float y = x;
return y;
}
static inline double
opt_barrier_double (double x)
{
volatile double y = x;
return y;
}
static inline void
force_eval_float (float x)
{
volatile float y UNUSED = x;
}
static inline void
force_eval_double (double x)
{
volatile double y UNUSED = x;
}
#endif
/* Evaluate an expression as the specified type, normally a type
cast should be enough, but compilers implement non-standard
excess-precision handling, so when FLT_EVAL_METHOD != 0 then
these functions may need to be customized. */
static inline float
eval_as_float (float x)
{
return x;
}
static inline double
eval_as_double (double x)
{
return x;
}
/* Error handling tail calls for special cases, with a sign argument.
The sign of the return value is set if the argument is non-zero. */
/* The result overflows. */
HIDDEN float __math_oflowf (uint32_t);
/* The result underflows to 0 in nearest rounding mode. */
HIDDEN float __math_uflowf (uint32_t);
/* The result underflows to 0 in some directed rounding mode only. */
HIDDEN float __math_may_uflowf (uint32_t);
/* Division by zero. */
HIDDEN float __math_divzerof (uint32_t);
/* The result overflows. */
HIDDEN double __math_oflow (uint32_t);
/* The result underflows to 0 in nearest rounding mode. */
HIDDEN double __math_uflow (uint32_t);
/* The result underflows to 0 in some directed rounding mode only. */
HIDDEN double __math_may_uflow (uint32_t);
/* Division by zero. */
HIDDEN double __math_divzero (uint32_t);
/* Error handling using input checking. */
/* Invalid input unless it is a quiet NaN. */
HIDDEN float __math_invalidf (float);
/* Invalid input unless it is a quiet NaN. */
HIDDEN double __math_invalid (double);
/* Error handling using output checking, only for errno setting. */
/* Check if the result overflowed to infinity. */
HIDDEN double __math_check_oflow (double);
/* Check if the result underflowed to 0. */
HIDDEN double __math_check_uflow (double);
/* Check if the result overflowed to infinity. */
static inline double
check_oflow (double x)
{
return WANT_ERRNO ? __math_check_oflow (x) : x;
}
/* Check if the result underflowed to 0. */
static inline double
check_uflow (double x)
{
return WANT_ERRNO ? __math_check_uflow (x) : x;
}
/* Check if the result overflowed to infinity. */
HIDDEN float __math_check_oflowf (float);
/* Check if the result underflowed to 0. */
HIDDEN float __math_check_uflowf (float);
/* Check if the result overflowed to infinity. */
static inline float
check_oflowf (float x)
{
return WANT_ERRNO ? __math_check_oflowf (x) : x;
}
/* Check if the result underflowed to 0. */
static inline float
check_uflowf (float x)
{
return WANT_ERRNO ? __math_check_uflowf (x) : x;
}
/* Shared between expf, exp2f and powf. */
#define EXP2F_TABLE_BITS 5
#define EXP2F_POLY_ORDER 3
extern const struct exp2f_data
{
uint64_t tab[1 << EXP2F_TABLE_BITS];
double shift_scaled;
double poly[EXP2F_POLY_ORDER];
double shift;
double invln2_scaled;
double poly_scaled[EXP2F_POLY_ORDER];
} __exp2f_data HIDDEN;
#define LOGF_TABLE_BITS 4
#define LOGF_POLY_ORDER 4
extern const struct logf_data
{
struct
{
double invc, logc;
} tab[1 << LOGF_TABLE_BITS];
double ln2;
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
} __logf_data HIDDEN;
#define LOG2F_TABLE_BITS 4
#define LOG2F_POLY_ORDER 4
extern const struct log2f_data
{
struct
{
double invc, logc;
} tab[1 << LOG2F_TABLE_BITS];
double poly[LOG2F_POLY_ORDER];
} __log2f_data HIDDEN;
#define POWF_LOG2_TABLE_BITS 4
#define POWF_LOG2_POLY_ORDER 5
#if TOINT_INTRINSICS
# define POWF_SCALE_BITS EXP2F_TABLE_BITS
#else
# define POWF_SCALE_BITS 0
#endif
#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
extern const struct powf_log2_data
{
struct
{
double invc, logc;
} tab[1 << POWF_LOG2_TABLE_BITS];
double poly[POWF_LOG2_POLY_ORDER];
} __powf_log2_data HIDDEN;
#define EXP_TABLE_BITS 7
#define EXP_POLY_ORDER 5
/* Use polynomial that is optimized for a wider input range. This may be
needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */
#define EXP_POLY_WIDE 0
/* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be
needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */
#define EXP_USE_TOINT_NARROW 0
#define EXP2_POLY_ORDER 5
#define EXP2_POLY_WIDE 0
extern const struct exp_data
{
double invln2N;
double shift;
double negln2hiN;
double negln2loN;
double poly[4]; /* Last four coefficients. */
double exp2_shift;
double exp2_poly[EXP2_POLY_ORDER];
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
} __exp_data HIDDEN;
#define LOG_TABLE_BITS 7
#define LOG_POLY_ORDER 6
#define LOG_POLY1_ORDER 12
extern const struct log_data
{
double ln2hi;
double ln2lo;
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
double poly1[LOG_POLY1_ORDER - 1];
struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
#if !HAVE_FAST_FMA
struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
#endif
} __log_data HIDDEN;
#define LOG2_TABLE_BITS 6
#define LOG2_POLY_ORDER 7
#define LOG2_POLY1_ORDER 11
extern const struct log2_data
{
double invln2hi;
double invln2lo;
double poly[LOG2_POLY_ORDER - 1];
double poly1[LOG2_POLY1_ORDER - 1];
struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
#if !HAVE_FAST_FMA
struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
#endif
} __log2_data HIDDEN;
#define POW_LOG_TABLE_BITS 7
#define POW_LOG_POLY_ORDER 8
extern const struct pow_log_data
{
double ln2hi;
double ln2lo;
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
/* Note: the pad field is unused, but allows slightly faster indexing. */
struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
} __pow_log_data HIDDEN;
extern const struct erff_data
{
float erff_poly_A[6];
float erff_poly_B[7];
} __erff_data HIDDEN;
#define ERF_POLY_A_ORDER 19
#define ERF_POLY_A_NCOEFFS 10
#define ERFC_POLY_C_NCOEFFS 16
#define ERFC_POLY_D_NCOEFFS 18
#define ERFC_POLY_E_NCOEFFS 14
#define ERFC_POLY_F_NCOEFFS 17
extern const struct erf_data
{
double erf_poly_A[ERF_POLY_A_NCOEFFS];
double erf_ratio_N_A[5];
double erf_ratio_D_A[5];
double erf_ratio_N_B[7];
double erf_ratio_D_B[6];
double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
} __erf_data HIDDEN;
#endif

View file

@ -0,0 +1,80 @@
/*
* Double-precision math error handling.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
#if WANT_ERRNO
#include <errno.h>
/* NOINLINE reduces code size and avoids making math functions non-leaf
when the error handling is inlined. */
NOINLINE static double
with_errno (double y, int e)
{
errno = e;
return y;
}
#else
#define with_errno(x, e) (x)
#endif
/* NOINLINE reduces code size. */
NOINLINE static double
xflow (uint32_t sign, double y)
{
y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
return with_errno (y, ERANGE);
}
HIDDEN double
__math_uflow (uint32_t sign)
{
return xflow (sign, 0x1p-767);
}
#if WANT_ERRNO_UFLOW
/* Underflows to zero in some non-nearest rounding mode, setting errno
is valid even if the result is non-zero, but in the subnormal range. */
HIDDEN double
__math_may_uflow (uint32_t sign)
{
return xflow (sign, 0x1.8p-538);
}
#endif
HIDDEN double
__math_oflow (uint32_t sign)
{
return xflow (sign, 0x1p769);
}
HIDDEN double
__math_divzero (uint32_t sign)
{
double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
return with_errno (y, ERANGE);
}
HIDDEN double
__math_invalid (double x)
{
double y = (x - x) / (x - x);
return isnan (x) ? y : with_errno (y, EDOM);
}
/* Check result and set errno if necessary. */
HIDDEN double
__math_check_uflow (double y)
{
return y == 0.0 ? with_errno (y, ERANGE) : y;
}
HIDDEN double
__math_check_oflow (double y)
{
return isinf (y) ? with_errno (y, ERANGE) : y;
}

View file

@ -0,0 +1,80 @@
/*
* Single-precision math error handling.
*
* Copyright (c) 2017-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
#if WANT_ERRNO
#include <errno.h>
/* NOINLINE reduces code size and avoids making math functions non-leaf
when the error handling is inlined. */
NOINLINE static float
with_errnof (float y, int e)
{
errno = e;
return y;
}
#else
#define with_errnof(x, e) (x)
#endif
/* NOINLINE reduces code size. */
NOINLINE static float
xflowf (uint32_t sign, float y)
{
y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
return with_errnof (y, ERANGE);
}
HIDDEN float
__math_uflowf (uint32_t sign)
{
return xflowf (sign, 0x1p-95f);
}
#if WANT_ERRNO_UFLOW
/* Underflows to zero in some non-nearest rounding mode, setting errno
is valid even if the result is non-zero, but in the subnormal range. */
HIDDEN float
__math_may_uflowf (uint32_t sign)
{
return xflowf (sign, 0x1.4p-75f);
}
#endif
HIDDEN float
__math_oflowf (uint32_t sign)
{
return xflowf (sign, 0x1p97f);
}
HIDDEN float
__math_divzerof (uint32_t sign)
{
float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
return with_errnof (y, ERANGE);
}
HIDDEN float
__math_invalidf (float x)
{
float y = (x - x) / (x - x);
return isnan (x) ? y : with_errnof (y, EDOM);
}
/* Check result and set errno if necessary. */
HIDDEN float
__math_check_uflowf (float y)
{
return y == 0.0f ? with_errnof (y, ERANGE) : y;
}
HIDDEN float
__math_check_oflowf (float y)
{
return isinf (y) ? with_errnof (y, ERANGE) : y;
}

View file

@ -0,0 +1,380 @@
/*
* Double-precision x^y function.
*
* Copyright (c) 2018-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <float.h>
#include <math.h>
#include <stdint.h>
#include "math_config.h"
/*
Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
*/
#define T __pow_log_data.tab
#define A __pow_log_data.poly
#define Ln2hi __pow_log_data.ln2hi
#define Ln2lo __pow_log_data.ln2lo
#define N (1 << POW_LOG_TABLE_BITS)
#define OFF 0x3fe6955500000000
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t
top12 (double x)
{
return asuint64 (x) >> 52;
}
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
additional 15 bits precision. IX is the bit representation of x, but
normalized in the subnormal range using the sign bit for the exponent. */
static inline double_t
log_inline (uint64_t ix, double_t *tail)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
uint64_t iz, tmp;
int k, i;
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
z = asdouble (iz);
kd = (double_t) k;
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
invc = T[i].invc;
logc = T[i].logc;
logctail = T[i].logctail;
/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
#if HAVE_FAST_FMA
r = fma (z, invc, -1.0);
#else
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
double_t zlo = z - zhi;
double_t rhi = zhi * invc - 1.0;
double_t rlo = zlo * invc;
r = rhi + rlo;
#endif
/* k*Ln2 + log(c) + r. */
t1 = kd * Ln2hi + logc;
t2 = t1 + r;
lo1 = kd * Ln2lo + logctail;
lo2 = t1 - t2 + r;
/* Evaluation is optimized assuming superscalar pipelined execution. */
double_t ar, ar2, ar3, lo3, lo4;
ar = A[0] * r; /* A[0] = -0.5. */
ar2 = r * ar;
ar3 = r * ar2;
/* k*Ln2 + log(c) + r + A[0]*r*r. */
#if HAVE_FAST_FMA
hi = t2 + ar2;
lo3 = fma (ar, r, -ar2);
lo4 = t2 - hi + ar2;
#else
double_t arhi = A[0] * rhi;
double_t arhi2 = rhi * arhi;
hi = t2 + arhi2;
lo3 = rlo * (ar + arhi);
lo4 = t2 - hi + arhi2;
#endif
/* p = log1p(r) - r - A[0]*r*r. */
#if POW_LOG_POLY_ORDER == 8
p = (ar3
* (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
#endif
lo = lo1 + lo2 + lo3 + lo4 + p;
y = hi + lo;
*tail = hi - y + lo;
return y;
}
#undef N
#undef T
#define N (1 << EXP_TABLE_BITS)
#define InvLn2N __exp_data.invln2N
#define NegLn2hiN __exp_data.negln2hiN
#define NegLn2loN __exp_data.negln2loN
#define Shift __exp_data.shift
#define T __exp_data.tab
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
if ((ki & 0x80000000) == 0)
{
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble (sbits);
y = 0x1p1009 * (scale + scale * tmp);
return check_oflow (eval_as_double (y));
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
/* Note: sbits is signed scale. */
scale = asdouble (sbits);
y = scale + scale * tmp;
if (fabs (y) < 1.0)
{
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo, one = 1.0;
if (y < 0.0)
one = -1.0;
lo = scale - y + scale * tmp;
hi = one + y;
lo = one - hi + y + lo;
y = eval_as_double (hi + lo) - one;
/* Fix the sign of 0. */
if (y == 0.0)
y = asdouble (sbits & 0x8000000000000000);
/* The underflow exception needs to be signaled explicitly. */
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
}
#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
static inline double
exp_inline (double_t x, double_t xtail, uint32_t sign_bias)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
{
if (abstop - top12 (0x1p-54) >= 0x80000000)
{
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
return sign_bias ? -one : one;
}
if (abstop >= top12 (1024.0))
{
/* Note: inf and nan are already handled. */
if (asuint64 (x) >> 63)
return __math_uflow (sign_bias);
else
return __math_oflow (sign_bias);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint (z);
ki = converttoint (z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd) >> 16;
kd = (double_t) (int32_t) ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
#if EXP_POLY_ORDER == 4
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP_POLY_ORDER == 5
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP_POLY_ORDER == 6
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
}
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */
static inline int
checkint (uint64_t iy)
{
int e = iy >> 52 & 0x7ff;
if (e < 0x3ff)
return 0;
if (e > 0x3ff + 52)
return 2;
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
return 0;
if (iy & (1ULL << (0x3ff + 52 - e)))
return 1;
return 2;
}
/* Returns 1 if input is the bit representation of 0, infinity or nan. */
static inline int
zeroinfnan (uint64_t i)
{
return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
}
double
pow (double x, double y)
{
uint32_t sign_bias = 0;
uint64_t ix, iy;
uint32_t topx, topy;
ix = asuint64 (x);
iy = asuint64 (y);
topx = top12 (x);
topy = top12 (y);
if (unlikely (topx - 0x001 >= 0x7ff - 0x001
|| (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
{
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
/* Special cases: (x < 0x1p-126 or inf or nan) or
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
if (unlikely (zeroinfnan (iy)))
{
if (2 * iy == 0)
return issignaling_inline (x) ? x + y : 1.0;
if (ix == asuint64 (1.0))
return issignaling_inline (y) ? x + y : 1.0;
if (2 * ix > 2 * asuint64 (INFINITY)
|| 2 * iy > 2 * asuint64 (INFINITY))
return x + y;
if (2 * ix == 2 * asuint64 (1.0))
return 1.0;
if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
if (unlikely (zeroinfnan (ix)))
{
double_t x2 = x * x;
if (ix >> 63 && checkint (iy) == 1)
{
x2 = -x2;
sign_bias = 1;
}
if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
return __math_divzero (sign_bias);
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
}
/* Here x and y are non-zero finite. */
if (ix >> 63)
{
/* Finite x < 0. */
int yint = checkint (iy);
if (yint == 0)
return __math_invalid (x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffffffffffff;
topx &= 0x7ff;
}
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
{
/* Note: sign_bias == 0 here because y is not odd. */
if (ix == asuint64 (1.0))
return 1.0;
if ((topy & 0x7ff) < 0x3be)
{
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
if (WANT_ROUNDING)
return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
else
return 1.0;
}
return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
: __math_uflow (0);
}
if (topx == 0)
{
/* Normalize subnormal x so exponent becomes negative. */
/* Without the barrier some versions of clang evalutate the mul
unconditionally causing spurious overflow exceptions. */
ix = asuint64 (opt_barrier_double (x) * 0x1p52);
ix &= 0x7fffffffffffffff;
ix -= 52ULL << 52;
}
}
double_t lo;
double_t hi = log_inline (ix, &lo);
double_t ehi, elo;
#if HAVE_FAST_FMA
ehi = y * hi;
elo = y * lo + fma (y, hi, -ehi);
#else
double_t yhi = asdouble (iy & -1ULL << 27);
double_t ylo = y - yhi;
double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
double_t llo = hi - lhi + lo;
ehi = yhi * lhi;
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
#endif
return exp_inline (ehi, elo, sign_bias);
}
#if USE_GLIBC_ABI
strong_alias (pow, __pow_finite)
hidden_alias (pow, __ieee754_pow)
# if LDBL_MANT_DIG == 53
long double powl (long double x, long double y) { return pow (x, y); }
# endif
#endif

View file

@ -0,0 +1,184 @@
/*
* Data for the log part of pow.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
#define N (1 << POW_LOG_TABLE_BITS)
const struct pow_log_data __pow_log_data = {
.ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45,
.poly = {
#if N == 128 && POW_LOG_POLY_ORDER == 8
// relative error: 0x1.11922ap-70
// in -0x1.6bp-8 0x1.6bp-8
// Coefficients are scaled to match the scaling during evaluation.
-0x1p-1,
0x1.555555555556p-2 * -2,
-0x1.0000000000006p-2 * -2,
0x1.999999959554ep-3 * 4,
-0x1.555555529a47ap-3 * 4,
0x1.2495b9b4845e9p-3 * -8,
-0x1.0002b8b263fc3p-3 * -8,
#endif
},
/* Algorithm:
x = 2^k z
log(x) = k ln2 + log(c) + log(z/c)
log(z/c) = poly(z/c - 1)
where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
and z falls into the ith one, then table entries are computed as
tab[i].invc = 1/c
tab[i].logc = round(0x1p43*log(c))/0x1p43
tab[i].logctail = (double)(log(c) - logc)
where c is chosen near the center of the subinterval such that 1/c has only a
few precision bits so z/c - 1 is exactly representible as double:
1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
error and the interval for z is selected such that near x == 1, where log(x)
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
.tab = {
#if N == 128
#define A(a, b, c) {a, 0, b, c},
A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
A(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45)
A(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49)
A(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47)
A(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46)
A(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50)
A(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45)
A(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45)
A(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45)
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
A(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46)
A(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46)
A(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46)
A(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45)
A(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47)
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
A(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47)
A(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45)
A(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46)
A(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45)
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
A(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46)
A(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52)
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
A(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45)
A(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45)
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
A(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46)
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
A(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45)
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
A(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48)
A(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45)
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
A(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45)
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
A(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46)
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
A(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45)
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
A(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46)
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
A(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45)
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
A(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46)
A(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45)
A(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45)
A(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47)
A(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45)
A(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46)
A(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46)
A(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47)
A(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45)
A(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45)
A(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45)
A(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49)
A(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45)
A(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46)
A(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45)
A(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45)
A(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45)
A(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45)
A(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45)
A(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47)
A(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51)
A(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45)
A(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45)
A(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46)
A(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45)
A(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46)
A(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47)
A(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47)
A(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45)
A(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47)
A(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45)
A(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48)
A(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45)
A(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51)
A(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51)
A(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46)
A(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48)
A(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45)
A(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45)
A(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45)
A(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45)
A(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47)
A(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45)
A(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45)
A(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46)
A(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46)
A(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47)
A(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45)
A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
#endif
},
};

View file

@ -0,0 +1,221 @@
/*
* Single-precision pow function.
*
* Copyright (c) 2017-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "math_config.h"
/*
POWF_LOG2_POLY_ORDER = 5
EXP2F_TABLE_BITS = 5
ULP error: 0.82 (~ 0.5 + relerr*2^24)
relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
*/
#define N (1 << POWF_LOG2_TABLE_BITS)
#define T __powf_log2_data.tab
#define A __powf_log2_data.poly
#define OFF 0x3f330000
/* Subnormal input is normalized so ix has negative biased exponent.
Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
static inline double_t
log2_inline (uint32_t ix)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, r4, p, q, y, y0, invc, logc;
uint32_t iz, top, tmp;
int k, i;
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t) top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t) asfloat (iz);
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t) k;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[0] * r + A[1];
p = A[2] * r + A[3];
r4 = r2 * r2;
q = A[4] * r + y0;
q = p * r2 + q;
y = y * r4 + q;
return y;
}
#undef N
#undef T
#define N (1 << EXP2F_TABLE_BITS)
#define T __exp2f_data.tab
#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
/* The output of log2 and thus the input of exp2 is either scaled by N
(in case of fast toint intrinsics) or not. The unscaled xd must be
in [-1021,1023], sign_bias sets the sign of the result. */
static inline float
exp2_inline (double_t xd, uint32_t sign_bias)
{
uint64_t ki, ski, t;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, y, s;
#if TOINT_INTRINSICS
# define C __exp2f_data.poly_scaled
/* N*x = k + r with r in [-1/2, 1/2] */
kd = roundtoint (xd); /* k */
ki = converttoint (xd);
#else
# define C __exp2f_data.poly
# define SHIFT __exp2f_data.shift_scaled
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
kd = eval_as_double (xd + SHIFT);
ki = asuint64 (kd);
kd -= SHIFT; /* k/N */
#endif
r = xd - kd;
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
ski = ki + sign_bias;
t += ski << (52 - EXP2F_TABLE_BITS);
s = asdouble (t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float (y);
}
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */
static inline int
checkint (uint32_t iy)
{
int e = iy >> 23 & 0xff;
if (e < 0x7f)
return 0;
if (e > 0x7f + 23)
return 2;
if (iy & ((1 << (0x7f + 23 - e)) - 1))
return 0;
if (iy & (1 << (0x7f + 23 - e)))
return 1;
return 2;
}
static inline int
zeroinfnan (uint32_t ix)
{
return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
}
float
powf (float x, float y)
{
uint32_t sign_bias = 0;
uint32_t ix, iy;
ix = asuint (x);
iy = asuint (y);
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || zeroinfnan (iy)))
{
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
if (unlikely (zeroinfnan (iy)))
{
if (2 * iy == 0)
return issignalingf_inline (x) ? x + y : 1.0f;
if (ix == 0x3f800000)
return issignalingf_inline (y) ? x + y : 1.0f;
if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
return x + y;
if (2 * ix == 2 * 0x3f800000)
return 1.0f;
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
if (unlikely (zeroinfnan (ix)))
{
float_t x2 = x * x;
if (ix & 0x80000000 && checkint (iy) == 1)
{
x2 = -x2;
sign_bias = 1;
}
#if WANT_ERRNO
if (2 * ix == 0 && iy & 0x80000000)
return __math_divzerof (sign_bias);
#endif
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy & 0x80000000 ? opt_barrier_float (1 / x2) : x2;
}
/* x and y are non-zero finite. */
if (ix & 0x80000000)
{
/* Finite x < 0. */
int yint = checkint (iy);
if (yint == 0)
return __math_invalidf (x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffff;
}
if (ix < 0x00800000)
{
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint (x * 0x1p23f);
ix &= 0x7fffffff;
ix -= 23 << 23;
}
}
double_t logx = log2_inline (ix);
double_t ylogx = y * logx; /* Note: cannot overflow, y is single prec. */
if (unlikely ((asuint64 (ylogx) >> 47 & 0xffff)
>= asuint64 (126.0 * POWF_SCALE) >> 47))
{
/* |y*log(x)| >= 126. */
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
/* |x^y| > 0x1.ffffffp127. */
return __math_oflowf (sign_bias);
if (WANT_ROUNDING && WANT_ERRNO
&& ylogx > 0x1.fffffffa3aae2p+6 * POWF_SCALE)
/* |x^y| > 0x1.fffffep127, check if we round away from 0. */
if ((!sign_bias
&& eval_as_float (1.0f + opt_barrier_float (0x1p-25f)) != 1.0f)
|| (sign_bias
&& eval_as_float (-1.0f - opt_barrier_float (0x1p-25f))
!= -1.0f))
return __math_oflowf (sign_bias);
if (ylogx <= -150.0 * POWF_SCALE)
return __math_uflowf (sign_bias);
#if WANT_ERRNO_UFLOW
if (ylogx < -149.0 * POWF_SCALE)
return __math_may_uflowf (sign_bias);
#endif
}
return exp2_inline (ylogx, sign_bias);
}
#if USE_GLIBC_ABI
strong_alias (powf, __powf_finite)
hidden_alias (powf, __ieee754_powf)
#endif

View file

@ -0,0 +1,34 @@
/*
* Data definition for powf.
*
* Copyright (c) 2017-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "math_config.h"
const struct powf_log2_data __powf_log2_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
{ 0x1p+0, 0x0p+0 * POWF_SCALE },
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
},
.poly = {
0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
0x1.71547652ab82bp0 * POWF_SCALE,
}
};

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_cos.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_cosf.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_exp.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_exp2f.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_exp2f_1u.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_expf.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_expf_1u.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_log.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_logf.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_pow.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_powf.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_sin.c"

View file

@ -0,0 +1,6 @@
/*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#define SCALAR 1
#include "v_sinf.c"

View file

@ -0,0 +1,79 @@
/*
* Single-precision sin/cos function.
*
* Copyright (c) 2018-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <stdint.h>
#include <math.h>
#include "math_config.h"
#include "sincosf.h"
/* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative
error is 0.5303 * 2^-23. A single-step range reduction is used for
small values. Large inputs have their range reduced using fast integer
arithmetic. */
void
sincosf (float y, float *sinp, float *cosp)
{
double x = y;
double s;
int n;
const sincos_t *p = &__sincosf_table[0];
if (abstop12 (y) < abstop12 (pio4))
{
double x2 = x * x;
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
{
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
/* Force underflow for tiny y. */
force_eval_float (x2);
*sinp = y;
*cosp = 1.0f;
return;
}
sincosf_poly (x, x2, p, 0, sinp, cosp);
}
else if (abstop12 (y) < abstop12 (120.0f))
{
x = reduce_fast (x, p, &n);
/* Setup the signs for sin and cos. */
s = p->sign[n & 3];
if (n & 2)
p = &__sincosf_table[1];
sincosf_poly (x * s, x * x, p, n, sinp, cosp);
}
else if (likely (abstop12 (y) < abstop12 (INFINITY)))
{
uint32_t xi = asuint (y);
int sign = xi >> 31;
x = reduce_large (xi, &n);
/* Setup signs for sin and cos - include original sign. */
s = p->sign[(n + sign) & 3];
if ((n + sign) & 2)
p = &__sincosf_table[1];
sincosf_poly (x * s, x * x, p, n, sinp, cosp);
}
else
{
/* Return NaN if Inf or NaN for both sin and cos. */
*sinp = *cosp = y - y;
#if WANT_ERRNO
/* Needed to set errno for +-Inf, the add is a hack to work
around a gcc register allocation issue: just passing y
affects code generation in the fast path. */
__math_invalidf (y + y);
#endif
}
}

View file

@ -0,0 +1,153 @@
/*
* Header for sinf, cosf and sincosf.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <stdint.h>
#include <math.h>
#include "math_config.h"
/* 2PI * 2^-64. */
static const double pi63 = 0x1.921FB54442D18p-62;
/* PI / 4. */
static const double pio4 = 0x1.921FB54442D18p-1;
/* The constants and polynomials for sine and cosine. */
typedef struct
{
double sign[4]; /* Sign of sine in quadrants 0..3. */
double hpi_inv; /* 2 / PI ( * 2^24 if !TOINT_INTRINSICS). */
double hpi; /* PI / 2. */
double c0, c1, c2, c3, c4; /* Cosine polynomial. */
double s1, s2, s3; /* Sine polynomial. */
} sincos_t;
/* Polynomial data (the cosine polynomial is negated in the 2nd entry). */
extern const sincos_t __sincosf_table[2] HIDDEN;
/* Table with 4/PI to 192 bit precision. */
extern const uint32_t __inv_pio4[] HIDDEN;
/* Top 12 bits of the float representation with the sign bit cleared. */
static inline uint32_t
abstop12 (float x)
{
return (asuint (x) >> 20) & 0x7ff;
}
/* Compute the sine and cosine of inputs X and X2 (X squared), using the
polynomial P and store the results in SINP and COSP. N is the quadrant,
if odd the cosine and sine polynomials are swapped. */
static inline void
sincosf_poly (double x, double x2, const sincos_t *p, int n, float *sinp,
float *cosp)
{
double x3, x4, x5, x6, s, c, c1, c2, s1;
x4 = x2 * x2;
x3 = x2 * x;
c2 = p->c3 + x2 * p->c4;
s1 = p->s2 + x2 * p->s3;
/* Swap sin/cos result based on quadrant. */
float *tmp = (n & 1 ? cosp : sinp);
cosp = (n & 1 ? sinp : cosp);
sinp = tmp;
c1 = p->c0 + x2 * p->c1;
x5 = x3 * x2;
x6 = x4 * x2;
s = x + x3 * p->s1;
c = c1 + x4 * p->c2;
*sinp = s + x5 * s1;
*cosp = c + x6 * c2;
}
/* Return the sine of inputs X and X2 (X squared) using the polynomial P.
N is the quadrant, and if odd the cosine polynomial is used. */
static inline float
sinf_poly (double x, double x2, const sincos_t *p, int n)
{
double x3, x4, x6, x7, s, c, c1, c2, s1;
if ((n & 1) == 0)
{
x3 = x * x2;
s1 = p->s2 + x2 * p->s3;
x7 = x3 * x2;
s = x + x3 * p->s1;
return s + x7 * s1;
}
else
{
x4 = x2 * x2;
c2 = p->c3 + x2 * p->c4;
c1 = p->c0 + x2 * p->c1;
x6 = x4 * x2;
c = c1 + x4 * p->c2;
return c + x6 * c2;
}
}
/* Fast range reduction using single multiply-subtract. Return the modulo of
X as a value between -PI/4 and PI/4 and store the quadrant in NP.
The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double
is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4,
the result is accurate for |X| <= 120.0. */
static inline double
reduce_fast (double x, const sincos_t *p, int *np)
{
double r;
#if TOINT_INTRINSICS
/* Use fast round and lround instructions when available. */
r = x * p->hpi_inv;
*np = converttoint (r);
return x - roundtoint (r) * p->hpi;
#else
/* Use scaled float to int conversion with explicit rounding.
hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
This avoids inaccuracies introduced by truncating negative values. */
r = x * p->hpi_inv;
int n = ((int32_t)r + 0x800000) >> 24;
*np = n;
return x - n * p->hpi;
#endif
}
/* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit
multiply computes the exact 2.62-bit fixed-point modulo. Since the result
can have at most 29 leading zeros after the binary point, the double
precision result is accurate to 33 bits. */
static inline double
reduce_large (uint32_t xi, int *np)
{
const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
int shift = (xi >> 23) & 7;
uint64_t n, res0, res1, res2;
xi = (xi & 0xffffff) | 0x800000;
xi <<= shift;
res0 = xi * arr[0];
res1 = (uint64_t)xi * arr[4];
res2 = (uint64_t)xi * arr[8];
res0 = (res2 >> 32) | (res0 << 32);
res0 += res1;
n = (res0 + (1ULL << 61)) >> 62;
res0 -= n << 62;
double x = (int64_t)res0;
*np = n;
return x * pi63;
}

View file

@ -0,0 +1,63 @@
/*
* Data definition for sinf, cosf and sincosf.
*
* Copyright (c) 2018-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <stdint.h>
#include <math.h>
#include "math_config.h"
#include "sincosf.h"
/* The constants and polynomials for sine and cosine. The 2nd entry
computes -cos (x) rather than cos (x) to get negation for free. */
const sincos_t __sincosf_table[2] =
{
{
{ 1.0, -1.0, -1.0, 1.0 },
#if TOINT_INTRINSICS
0x1.45F306DC9C883p-1,
#else
0x1.45F306DC9C883p+23,
#endif
0x1.921FB54442D18p0,
0x1p0,
-0x1.ffffffd0c621cp-2,
0x1.55553e1068f19p-5,
-0x1.6c087e89a359dp-10,
0x1.99343027bf8c3p-16,
-0x1.555545995a603p-3,
0x1.1107605230bc4p-7,
-0x1.994eb3774cf24p-13
},
{
{ 1.0, -1.0, -1.0, 1.0 },
#if TOINT_INTRINSICS
0x1.45F306DC9C883p-1,
#else
0x1.45F306DC9C883p+23,
#endif
0x1.921FB54442D18p0,
-0x1p0,
0x1.ffffffd0c621cp-2,
-0x1.55553e1068f19p-5,
0x1.6c087e89a359dp-10,
-0x1.99343027bf8c3p-16,
-0x1.555545995a603p-3,
0x1.1107605230bc4p-7,
-0x1.994eb3774cf24p-13
}
};
/* Table with 4/PI to 192 bit precision. To avoid unaligned accesses
only 8 new bits are added per entry, making the table 4 times larger. */
const uint32_t __inv_pio4[24] =
{
0xa2, 0xa2f9, 0xa2f983, 0xa2f9836e,
0xf9836e4e, 0x836e4e44, 0x6e4e4415, 0x4e441529,
0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0,
0x34ddc0db, 0xddc0db62, 0xc0db6295, 0xdb629599,
0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041
};

View file

@ -0,0 +1,67 @@
/*
* Single-precision sin function.
*
* Copyright (c) 2018-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include "math_config.h"
#include "sincosf.h"
/* Fast sinf implementation. Worst-case ULP is 0.5607, maximum relative
error is 0.5303 * 2^-23. A single-step range reduction is used for
small values. Large inputs have their range reduced using fast integer
arithmetic. */
float
sinf (float y)
{
double x = y;
double s;
int n;
const sincos_t *p = &__sincosf_table[0];
if (abstop12 (y) < abstop12 (pio4))
{
s = x * x;
if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
{
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
/* Force underflow for tiny y. */
force_eval_float (s);
return y;
}
return sinf_poly (x, s, p, 0);
}
else if (likely (abstop12 (y) < abstop12 (120.0f)))
{
x = reduce_fast (x, p, &n);
/* Setup the signs for sin and cos. */
s = p->sign[n & 3];
if (n & 2)
p = &__sincosf_table[1];
return sinf_poly (x * s, x * x, p, n);
}
else if (abstop12 (y) < abstop12 (INFINITY))
{
uint32_t xi = asuint (y);
int sign = xi >> 31;
x = reduce_large (xi, &n);
/* Setup signs for sin and cos - include original sign. */
s = p->sign[(n + sign) & 3];
if ((n + sign) & 2)
p = &__sincosf_table[1];
return sinf_poly (x * s, x * x, p, n);
}
else
return __math_invalidf (y);
}

View file

@ -0,0 +1,773 @@
/*
* Microbenchmark for math functions.
*
* Copyright (c) 2018-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#undef _GNU_SOURCE
#define _GNU_SOURCE 1
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <math.h>
#include "mathlib.h"
#ifndef WANT_VMATH
/* Enable the build of vector math code. */
# define WANT_VMATH 1
#endif
/* Number of measurements, best result is reported. */
#define MEASURE 60
/* Array size. */
#define N 8000
/* Iterations over the array. */
#define ITER 125
static double *Trace;
static size_t trace_size;
static double A[N];
static float Af[N];
static long measurecount = MEASURE;
static long itercount = ITER;
#if __aarch64__ && WANT_VMATH
typedef __f64x2_t v_double;
#define v_double_len() 2
static inline v_double
v_double_load (const double *p)
{
return (v_double){p[0], p[1]};
}
static inline v_double
v_double_dup (double x)
{
return (v_double){x, x};
}
typedef __f32x4_t v_float;
#define v_float_len() 4
static inline v_float
v_float_load (const float *p)
{
return (v_float){p[0], p[1], p[2], p[3]};
}
static inline v_float
v_float_dup (float x)
{
return (v_float){x, x, x, x};
}
#else
/* dummy definitions to make things compile. */
typedef double v_double;
typedef float v_float;
#define v_double_len(x) 1
#define v_double_load(x) (x)[0]
#define v_double_dup(x) (x)
#define v_float_len(x) 1
#define v_float_load(x) (x)[0]
#define v_float_dup(x) (x)
#endif
static double
dummy (double x)
{
return x;
}
static float
dummyf (float x)
{
return x;
}
#if WANT_VMATH
#if __aarch64__
static v_double
__v_dummy (v_double x)
{
return x;
}
static v_float
__v_dummyf (v_float x)
{
return x;
}
#ifdef __vpcs
__vpcs static v_double
__vn_dummy (v_double x)
{
return x;
}
__vpcs static v_float
__vn_dummyf (v_float x)
{
return x;
}
__vpcs static v_float
xy__vn_powf (v_float x)
{
return __vn_powf (x, x);
}
__vpcs static v_float
xy_Z_powf (v_float x)
{
return _ZGVnN4vv_powf (x, x);
}
__vpcs static v_double
xy__vn_pow (v_double x)
{
return __vn_pow (x, x);
}
__vpcs static v_double
xy_Z_pow (v_double x)
{
return _ZGVnN2vv_pow (x, x);
}
#endif
static v_float
xy__v_powf (v_float x)
{
return __v_powf (x, x);
}
static v_double
xy__v_pow (v_double x)
{
return __v_pow (x, x);
}
#endif
static float
xy__s_powf (float x)
{
return __s_powf (x, x);
}
static double
xy__s_pow (double x)
{
return __s_pow (x, x);
}
#endif
static double
xypow (double x)
{
return pow (x, x);
}
static float
xypowf (float x)
{
return powf (x, x);
}
static double
xpow (double x)
{
return pow (x, 23.4);
}
static float
xpowf (float x)
{
return powf (x, 23.4f);
}
static double
ypow (double x)
{
return pow (2.34, x);
}
static float
ypowf (float x)
{
return powf (2.34f, x);
}
static float
sincosf_wrap (float x)
{
float s, c;
sincosf (x, &s, &c);
return s + c;
}
static const struct fun
{
const char *name;
int prec;
int vec;
double lo;
double hi;
union
{
double (*d) (double);
float (*f) (float);
v_double (*vd) (v_double);
v_float (*vf) (v_float);
#ifdef __vpcs
__vpcs v_double (*vnd) (v_double);
__vpcs v_float (*vnf) (v_float);
#endif
} fun;
} funtab[] = {
#define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
#define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
#define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
#define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
#define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
#define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
D (dummy, 1.0, 2.0)
D (exp, -9.9, 9.9)
D (exp, 0.5, 1.0)
D (exp2, -9.9, 9.9)
D (log, 0.01, 11.1)
D (log, 0.999, 1.001)
D (log2, 0.01, 11.1)
D (log2, 0.999, 1.001)
{"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
D (xpow, 0.01, 11.1)
D (ypow, -9.9, 9.9)
D (erf, -6.0, 6.0)
F (dummyf, 1.0, 2.0)
F (expf, -9.9, 9.9)
F (exp2f, -9.9, 9.9)
F (logf, 0.01, 11.1)
F (log2f, 0.01, 11.1)
{"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
F (xpowf, 0.01, 11.1)
F (ypowf, -9.9, 9.9)
{"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
{"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
{"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
{"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
{"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
{"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
F (sinf, 0.1, 0.7)
F (sinf, 0.8, 3.1)
F (sinf, -3.1, 3.1)
F (sinf, 3.3, 33.3)
F (sinf, 100, 1000)
F (sinf, 1e6, 1e32)
F (cosf, 0.1, 0.7)
F (cosf, 0.8, 3.1)
F (cosf, -3.1, 3.1)
F (cosf, 3.3, 33.3)
F (cosf, 100, 1000)
F (cosf, 1e6, 1e32)
F (erff, -4.0, 4.0)
#if WANT_VMATH
D (__s_sin, -3.1, 3.1)
D (__s_cos, -3.1, 3.1)
D (__s_exp, -9.9, 9.9)
D (__s_log, 0.01, 11.1)
{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
F (__s_expf, -9.9, 9.9)
F (__s_expf_1u, -9.9, 9.9)
F (__s_exp2f, -9.9, 9.9)
F (__s_exp2f_1u, -9.9, 9.9)
F (__s_logf, 0.01, 11.1)
{"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
F (__s_sinf, -3.1, 3.1)
F (__s_cosf, -3.1, 3.1)
#if __aarch64__
VD (__v_dummy, 1.0, 2.0)
VD (__v_sin, -3.1, 3.1)
VD (__v_cos, -3.1, 3.1)
VD (__v_exp, -9.9, 9.9)
VD (__v_log, 0.01, 11.1)
{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
VF (__v_dummyf, 1.0, 2.0)
VF (__v_expf, -9.9, 9.9)
VF (__v_expf_1u, -9.9, 9.9)
VF (__v_exp2f, -9.9, 9.9)
VF (__v_exp2f_1u, -9.9, 9.9)
VF (__v_logf, 0.01, 11.1)
{"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
VF (__v_sinf, -3.1, 3.1)
VF (__v_cosf, -3.1, 3.1)
#ifdef __vpcs
VND (__vn_dummy, 1.0, 2.0)
VND (__vn_exp, -9.9, 9.9)
VND (_ZGVnN2v_exp, -9.9, 9.9)
VND (__vn_log, 0.01, 11.1)
VND (_ZGVnN2v_log, 0.01, 11.1)
{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
VND (__vn_sin, -3.1, 3.1)
VND (_ZGVnN2v_sin, -3.1, 3.1)
VND (__vn_cos, -3.1, 3.1)
VND (_ZGVnN2v_cos, -3.1, 3.1)
VNF (__vn_dummyf, 1.0, 2.0)
VNF (__vn_expf, -9.9, 9.9)
VNF (_ZGVnN4v_expf, -9.9, 9.9)
VNF (__vn_expf_1u, -9.9, 9.9)
VNF (__vn_exp2f, -9.9, 9.9)
VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
VNF (__vn_exp2f_1u, -9.9, 9.9)
VNF (__vn_logf, 0.01, 11.1)
VNF (_ZGVnN4v_logf, 0.01, 11.1)
{"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
VNF (__vn_sinf, -3.1, 3.1)
VNF (_ZGVnN4v_sinf, -3.1, 3.1)
VNF (__vn_cosf, -3.1, 3.1)
VNF (_ZGVnN4v_cosf, -3.1, 3.1)
#endif
#endif
#endif
{0},
#undef F
#undef D
#undef VF
#undef VD
#undef VNF
#undef VND
};
static void
gen_linear (double lo, double hi)
{
for (int i = 0; i < N; i++)
A[i] = (lo * (N - i) + hi * i) / N;
}
static void
genf_linear (double lo, double hi)
{
for (int i = 0; i < N; i++)
Af[i] = (float)(lo * (N - i) + hi * i) / N;
}
static inline double
asdouble (uint64_t i)
{
union
{
uint64_t i;
double f;
} u = {i};
return u.f;
}
static uint64_t seed = 0x0123456789abcdef;
static double
frand (double lo, double hi)
{
seed = 6364136223846793005ULL * seed + 1;
return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
}
static void
gen_rand (double lo, double hi)
{
for (int i = 0; i < N; i++)
A[i] = frand (lo, hi);
}
static void
genf_rand (double lo, double hi)
{
for (int i = 0; i < N; i++)
Af[i] = (float)frand (lo, hi);
}
static void
gen_trace (int index)
{
for (int i = 0; i < N; i++)
A[i] = Trace[index + i];
}
static void
genf_trace (int index)
{
for (int i = 0; i < N; i++)
Af[i] = (float)Trace[index + i];
}
static void
run_thruput (double f (double))
{
for (int i = 0; i < N; i++)
f (A[i]);
}
static void
runf_thruput (float f (float))
{
for (int i = 0; i < N; i++)
f (Af[i]);
}
volatile double zero = 0;
static void
run_latency (double f (double))
{
double z = zero;
double prev = z;
for (int i = 0; i < N; i++)
prev = f (A[i] + prev * z);
}
static void
runf_latency (float f (float))
{
float z = (float)zero;
float prev = z;
for (int i = 0; i < N; i++)
prev = f (Af[i] + prev * z);
}
static void
run_v_thruput (v_double f (v_double))
{
for (int i = 0; i < N; i += v_double_len ())
f (v_double_load (A+i));
}
static void
runf_v_thruput (v_float f (v_float))
{
for (int i = 0; i < N; i += v_float_len ())
f (v_float_load (Af+i));
}
static void
run_v_latency (v_double f (v_double))
{
v_double z = v_double_dup (zero);
v_double prev = z;
for (int i = 0; i < N; i += v_double_len ())
prev = f (v_double_load (A+i) + prev * z);
}
static void
runf_v_latency (v_float f (v_float))
{
v_float z = v_float_dup (zero);
v_float prev = z;
for (int i = 0; i < N; i += v_float_len ())
prev = f (v_float_load (Af+i) + prev * z);
}
#ifdef __vpcs
static void
run_vn_thruput (__vpcs v_double f (v_double))
{
for (int i = 0; i < N; i += v_double_len ())
f (v_double_load (A+i));
}
static void
runf_vn_thruput (__vpcs v_float f (v_float))
{
for (int i = 0; i < N; i += v_float_len ())
f (v_float_load (Af+i));
}
static void
run_vn_latency (__vpcs v_double f (v_double))
{
v_double z = v_double_dup (zero);
v_double prev = z;
for (int i = 0; i < N; i += v_double_len ())
prev = f (v_double_load (A+i) + prev * z);
}
static void
runf_vn_latency (__vpcs v_float f (v_float))
{
v_float z = v_float_dup (zero);
v_float prev = z;
for (int i = 0; i < N; i += v_float_len ())
prev = f (v_float_load (Af+i) + prev * z);
}
#endif
static uint64_t
tic (void)
{
struct timespec ts;
if (clock_gettime (CLOCK_REALTIME, &ts))
abort ();
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
#define TIMEIT(run, f) do { \
dt = -1; \
run (f); /* Warm up. */ \
for (int j = 0; j < measurecount; j++) \
{ \
uint64_t t0 = tic (); \
for (int i = 0; i < itercount; i++) \
run (f); \
uint64_t t1 = tic (); \
if (t1 - t0 < dt) \
dt = t1 - t0; \
} \
} while (0)
static void
bench1 (const struct fun *f, int type, double lo, double hi)
{
uint64_t dt = 0;
uint64_t ns100;
const char *s = type == 't' ? "rthruput" : "latency";
int vlen = 1;
if (f->vec && f->prec == 'd')
vlen = v_double_len();
else if (f->vec && f->prec == 'f')
vlen = v_float_len();
if (f->prec == 'd' && type == 't' && f->vec == 0)
TIMEIT (run_thruput, f->fun.d);
else if (f->prec == 'd' && type == 'l' && f->vec == 0)
TIMEIT (run_latency, f->fun.d);
else if (f->prec == 'f' && type == 't' && f->vec == 0)
TIMEIT (runf_thruput, f->fun.f);
else if (f->prec == 'f' && type == 'l' && f->vec == 0)
TIMEIT (runf_latency, f->fun.f);
else if (f->prec == 'd' && type == 't' && f->vec == 'v')
TIMEIT (run_v_thruput, f->fun.vd);
else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
TIMEIT (run_v_latency, f->fun.vd);
else if (f->prec == 'f' && type == 't' && f->vec == 'v')
TIMEIT (runf_v_thruput, f->fun.vf);
else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
TIMEIT (runf_v_latency, f->fun.vf);
#ifdef __vpcs
else if (f->prec == 'd' && type == 't' && f->vec == 'n')
TIMEIT (run_vn_thruput, f->fun.vnd);
else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
TIMEIT (run_vn_latency, f->fun.vnd);
else if (f->prec == 'f' && type == 't' && f->vec == 'n')
TIMEIT (runf_vn_thruput, f->fun.vnf);
else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
TIMEIT (runf_vn_latency, f->fun.vnf);
#endif
if (type == 't')
{
ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
(unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
(unsigned long long) dt, lo, hi);
}
else if (type == 'l')
{
ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
(unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
(unsigned long long) dt, lo, hi);
}
fflush (stdout);
}
static void
bench (const struct fun *f, double lo, double hi, int type, int gen)
{
if (f->prec == 'd' && gen == 'r')
gen_rand (lo, hi);
else if (f->prec == 'd' && gen == 'l')
gen_linear (lo, hi);
else if (f->prec == 'd' && gen == 't')
gen_trace (0);
else if (f->prec == 'f' && gen == 'r')
genf_rand (lo, hi);
else if (f->prec == 'f' && gen == 'l')
genf_linear (lo, hi);
else if (f->prec == 'f' && gen == 't')
genf_trace (0);
if (gen == 't')
hi = trace_size / N;
if (type == 'b' || type == 't')
bench1 (f, 't', lo, hi);
if (type == 'b' || type == 'l')
bench1 (f, 'l', lo, hi);
for (int i = N; i < trace_size; i += N)
{
if (f->prec == 'd')
gen_trace (i);
else
genf_trace (i);
lo = i / N;
if (type == 'b' || type == 't')
bench1 (f, 't', lo, hi);
if (type == 'b' || type == 'l')
bench1 (f, 'l', lo, hi);
}
}
static void
readtrace (const char *name)
{
int n = 0;
FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
if (!f)
{
printf ("openning \"%s\" failed: %m\n", name);
exit (1);
}
for (;;)
{
if (n >= trace_size)
{
trace_size += N;
Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
if (Trace == NULL)
{
printf ("out of memory\n");
exit (1);
}
}
if (fscanf (f, "%lf", Trace + n) != 1)
break;
n++;
}
if (ferror (f) || n == 0)
{
printf ("reading \"%s\" failed: %m\n", name);
exit (1);
}
fclose (f);
if (n % N == 0)
trace_size = n;
for (int i = 0; n < trace_size; n++, i++)
Trace[n] = Trace[i];
}
static void
usage (void)
{
printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
"[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
"[func2 ..]\n");
printf ("func:\n");
printf ("%7s [run all benchmarks]\n", "all");
for (const struct fun *f = funtab; f->name; f++)
printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
exit (1);
}
int
main (int argc, char *argv[])
{
int usergen = 0, gen = 'r', type = 'b', all = 0;
double lo = 0, hi = 0;
const char *tracefile = "-";
argv++;
argc--;
for (;;)
{
if (argc <= 0)
usage ();
if (argv[0][0] != '-')
break;
else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
{
usergen = 1;
lo = strtod (argv[1], 0);
hi = strtod (argv[2], 0);
argv += 3;
argc -= 3;
}
else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
{
measurecount = strtol (argv[1], 0, 0);
argv += 2;
argc -= 2;
}
else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
{
itercount = strtol (argv[1], 0, 0);
argv += 2;
argc -= 2;
}
else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
{
gen = argv[1][0];
if (strchr ("rlt", gen) == 0)
usage ();
argv += 2;
argc -= 2;
}
else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
{
gen = 't'; /* -f implies -g trace. */
tracefile = argv[1];
argv += 2;
argc -= 2;
}
else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
{
type = argv[1][0];
if (strchr ("ltb", type) == 0)
usage ();
argv += 2;
argc -= 2;
}
else
usage ();
}
if (gen == 't')
{
readtrace (tracefile);
lo = hi = 0;
usergen = 1;
}
while (argc > 0)
{
int found = 0;
all = strcmp (argv[0], "all") == 0;
for (const struct fun *f = funtab; f->name; f++)
if (all || strcmp (argv[0], f->name) == 0)
{
found = 1;
if (!usergen)
{
lo = f->lo;
hi = f->hi;
}
bench (f, lo, hi, type, gen);
if (usergen && !all)
break;
}
if (!found)
printf ("unknown function: %s\n", argv[0]);
argv++;
argc--;
}
return 0;
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,91 @@
/*
* intern.h
*
* Copyright (c) 1999-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef mathtest_intern_h
#define mathtest_intern_h
#include <mpfr.h>
#include <mpc.h>
#include "types.h"
#include "wrappers.h"
/* Generic function pointer. */
typedef void (*funcptr)(void);
/* Pointers to test function types. */
typedef int (*testfunc1)(mpfr_t, mpfr_t, mpfr_rnd_t);
typedef int (*testfunc2)(mpfr_t, mpfr_t, mpfr_t, mpfr_rnd_t);
typedef int (*testrred)(mpfr_t, mpfr_t, int *);
typedef char * (*testsemi1)(uint32 *, uint32 *);
typedef char * (*testsemi2)(uint32 *, uint32 *, uint32 *);
typedef char * (*testsemi2f)(uint32 *, uint32 *, uint32 *);
typedef char * (*testldexp)(uint32 *, uint32 *, uint32 *);
typedef char * (*testfrexp)(uint32 *, uint32 *, uint32 *);
typedef char * (*testmodf)(uint32 *, uint32 *, uint32 *);
typedef char * (*testclassify)(uint32 *, uint32 *);
typedef char * (*testclassifyf)(uint32 *, uint32 *);
typedef int (*testfunc1c)(mpc_t, mpc_t, mpc_rnd_t);
typedef int (*testfunc2c)(mpc_t, mpc_t, mpc_t, mpc_rnd_t);
typedef int (*testfunc1cr)(mpfr_t, mpc_t, mpfr_rnd_t);
/* Pointer to a function that generates random test cases. */
typedef void (*casegen)(uint32 *, uint32, uint32);
/*
* List of testable functions, their types, and their testable range.
*/
enum {
args1, /* afloat-based, one argument */
args1f, /* same as args1 but in single prec */
args2, /* afloat-based, two arguments */
args2f, /* same as args2 but in single prec */
rred, /* afloat-based, one arg, aux return */
rredf, /* same as rred but in single prec */
semi1, /* seminumerical, one argument */
semi1f, /* seminumerical, 1 arg, float */
semi2, /* seminumerical, two arguments */
semi2f, /* seminumerical, 2 args, floats */
t_ldexp, /* dbl * int -> dbl */
t_ldexpf, /* sgl * int -> sgl */
t_frexp, /* dbl -> dbl * int */
t_frexpf, /* sgl -> sgl * int */
t_modf, /* dbl -> dbl * dbl */
t_modff, /* sgl -> sgl * sgl */
classify, /* classify double: dbl -> int */
classifyf, /* classify float: flt -> int */
compare, /* compare doubles, returns int */
comparef, /* compare floats, returns int */
args1c, /* acomplex-base, one argument */
args2c,
args1fc,
args2fc,
args1cr, /* dbl-complex -> complex */
args1fcr /* sgl-complex -> complex */
};
typedef struct __testable Testable;
struct __testable {
char *name;
funcptr func;
int type;
wrapperfunc wrappers[MAXWRAPPERS];
casegen cases; /* complex functions use the same casegen for both real and complex args */
uint32 caseparam1, caseparam2;
};
extern Testable functions[];
extern const int nfunctions;
extern void init_pi(void);
int nargs_(Testable* f);
#endif

View file

@ -0,0 +1,334 @@
/*
* main.c
*
* Copyright (c) 1999-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <time.h>
#include "intern.h"
void gencases(Testable *fn, int number);
void docase(Testable *fn, uint32 *args);
void vet_for_decline(Testable *fn, uint32 *args, uint32 *result, int got_errno_in);
void seed_random(uint32 seed);
int check_declines = 0;
int lib_fo = 0;
int lib_no_arith = 0;
int ntests = 0;
int nargs_(Testable* f) {
switch((f)->type) {
case args2:
case args2f:
case semi2:
case semi2f:
case t_ldexp:
case t_ldexpf:
case args1c:
case args1fc:
case args1cr:
case args1fcr:
case compare:
case comparef:
return 2;
case args2c:
case args2fc:
return 4;
default:
return 1;
}
}
static int isdouble(Testable *f)
{
switch (f->type) {
case args1:
case rred:
case semi1:
case t_frexp:
case t_modf:
case classify:
case t_ldexp:
case args2:
case semi2:
case args1c:
case args1cr:
case compare:
case args2c:
return 1;
case args1f:
case rredf:
case semi1f:
case t_frexpf:
case t_modff:
case classifyf:
case args2f:
case semi2f:
case t_ldexpf:
case comparef:
case args1fc:
case args1fcr:
case args2fc:
return 0;
default:
assert(0 && "Bad function type");
}
}
Testable *find_function(const char *func)
{
int i;
for (i = 0; i < nfunctions; i++) {
if (func && !strcmp(func, functions[i].name)) {
return &functions[i];
}
}
return NULL;
}
void get_operand(const char *str, Testable *f, uint32 *word0, uint32 *word1)
{
struct special {
unsigned dblword0, dblword1, sglword;
const char *name;
} specials[] = {
{0x00000000,0x00000000,0x00000000,"0"},
{0x3FF00000,0x00000000,0x3f800000,"1"},
{0x7FF00000,0x00000000,0x7f800000,"inf"},
{0x7FF80000,0x00000001,0x7fc00000,"qnan"},
{0x7FF00000,0x00000001,0x7f800001,"snan"},
{0x3ff921fb,0x54442d18,0x3fc90fdb,"pi2"},
{0x400921fb,0x54442d18,0x40490fdb,"pi"},
{0x3fe921fb,0x54442d18,0x3f490fdb,"pi4"},
{0x4002d97c,0x7f3321d2,0x4016cbe4,"3pi4"},
};
int i;
for (i = 0; i < (int)(sizeof(specials)/sizeof(*specials)); i++) {
if (!strcmp(str, specials[i].name) ||
((str[0] == '-' || str[0] == '+') &&
!strcmp(str+1, specials[i].name))) {
assert(f);
if (isdouble(f)) {
*word0 = specials[i].dblword0;
*word1 = specials[i].dblword1;
} else {
*word0 = specials[i].sglword;
*word1 = 0;
}
if (str[0] == '-')
*word0 |= 0x80000000U;
return;
}
}
sscanf(str, "%"I32"x.%"I32"x", word0, word1);
}
void dofile(FILE *fp, int translating) {
char buf[1024], sparebuf[1024], *p;
/*
* Command syntax is:
*
* - "seed <integer>" sets a random seed
*
* - "test <function> <ntests>" generates random test lines
*
* - "<function> op1=foo [op2=bar]" generates a specific test
* - "func=<function> op1=foo [op2=bar]" does the same
* - "func=<function> op1=foo result=bar" will just output the line as-is
*
* - a semicolon or a blank line is ignored
*/
while (fgets(buf, sizeof(buf), fp)) {
buf[strcspn(buf, "\r\n")] = '\0';
strcpy(sparebuf, buf);
p = buf;
while (*p && isspace(*p)) p++;
if (!*p || *p == ';') {
/* Comment or blank line. Only print if `translating' is set. */
if (translating)
printf("%s\n", buf);
continue;
}
if (!strncmp(buf, "seed ", 5)) {
seed_random(atoi(buf+5));
} else if (!strncmp(buf, "random=", 7)) {
/*
* Copy 'random=on' / 'random=off' lines unconditionally
* to the output, so that random test failures can be
* accumulated into a recent-failures-list file and
* still identified as random-in-origin when re-run the
* next day.
*/
printf("%s\n", buf);
} else if (!strncmp(buf, "test ", 5)) {
char *p = buf+5;
char *q;
int ntests, i;
q = p;
while (*p && !isspace(*p)) p++;
if (*p) *p++ = '\0';
while (*p && isspace(*p)) p++;
if (*p)
ntests = atoi(p);
else
ntests = 100; /* *shrug* */
for (i = 0; i < nfunctions; i++) {
if (!strcmp(q, functions[i].name)) {
gencases(&functions[i], ntests);
break;
}
}
if (i == nfunctions) {
fprintf(stderr, "unknown test `%s'\n", q);
}
} else {
/*
* Parse a specific test line.
*/
uint32 ops[8], result[8];
int got_op = 0; /* &1 for got_op1, &4 for got_op3 etc. */
Testable *f = 0;
char *q, *r;
int got_result = 0, got_errno_in = 0;
for (q = strtok(p, " \t"); q; q = strtok(NULL, " \t")) {
r = strchr(q, '=');
if (!r) {
f = find_function(q);
} else {
*r++ = '\0';
if (!strcmp(q, "func"))
f = find_function(r);
else if (!strcmp(q, "op1") || !strcmp(q, "op1r")) {
get_operand(r, f, &ops[0], &ops[1]);
got_op |= 1;
} else if (!strcmp(q, "op2") || !strcmp(q, "op1i")) {
get_operand(r, f, &ops[2], &ops[3]);
got_op |= 2;
} else if (!strcmp(q, "op2r")) {
get_operand(r, f, &ops[4], &ops[5]);
got_op |= 4;
} else if (!strcmp(q, "op2i")) {
get_operand(r, f, &ops[6], &ops[7]);
got_op |= 8;
} else if (!strcmp(q, "result") || !strcmp(q, "resultr")) {
get_operand(r, f, &result[0], &result[1]);
got_result |= 1;
} else if (!strcmp(q, "resulti")) {
get_operand(r, f, &result[4], &result[5]);
got_result |= 2;
} else if (!strcmp(q, "res2")) {
get_operand(r, f, &result[2], &result[3]);
got_result |= 4;
} else if (!strcmp(q, "errno_in")) {
got_errno_in = 1;
}
}
}
/*
* Test cases already set up by the input are not
* reprocessed by default, unlike the fplib tests. (This
* is mostly for historical reasons, because we used to
* use a very slow and incomplete internal reference
* implementation; now our ref impl is MPFR/MPC it
* probably wouldn't be such a bad idea, though we'd still
* have to make sure all the special cases came out
* right.) If translating==2 (corresponding to the -T
* command-line option) then we regenerate everything
* regardless.
*/
if (got_result && translating < 2) {
if (f)
vet_for_decline(f, ops, result, got_errno_in);
puts(sparebuf);
continue;
}
if (f && got_op==(1<<nargs_(f))-1) {
/*
* And do it!
*/
docase(f, ops);
}
}
}
}
int main(int argc, char **argv) {
int errs = 0, opts = 1, files = 0, translating = 0;
unsigned int seed = 1; /* in case no explicit seed provided */
seed_random(seed);
setvbuf(stdout, NULL, _IOLBF, BUFSIZ); /* stops incomplete lines being printed when out of time */
while (--argc) {
FILE *fp;
char *p = *++argv;
if (opts && *p == '-') {
if(*(p+1) == 0) { /* single -, read from stdin */
break;
} else if (!strcmp(p, "-t")) {
translating = 1;
} else if (!strcmp(p, "-T")) {
translating = 2;
} else if (!strcmp(p, "-c")) {
check_declines = 1;
} else if (!strcmp(p, "--")) {
opts = 0;
} else if (!strcmp(p,"--seed") && argc > 1 && 1==sscanf(*(argv+1),"%u",&seed)) {
seed_random(seed);
argv++; /* next in argv is seed value, so skip */
--argc;
} else if (!strcmp(p, "-fo")) {
lib_fo = 1;
} else if (!strcmp(p, "-noarith")) {
lib_no_arith = 1;
} else {
fprintf(stderr,
"rtest: ignoring unrecognised option '%s'\n", p);
errs = 1;
}
} else {
files = 1;
if (!errs) {
fp = fopen(p, "r");
if (fp) {
dofile(fp, translating);
fclose(fp);
} else {
perror(p);
errs = 1;
}
}
}
}
/*
* If no filename arguments, use stdin.
*/
if (!files && !errs) {
dofile(stdin, translating);
}
if (check_declines) {
fprintf(stderr, "Tests expected to run: %d\n", ntests);
fflush(stderr);
}
return errs;
}

View file

@ -0,0 +1,99 @@
/*
* random.c - random number generator for producing mathlib test cases
*
* Copyright (c) 1998-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "types.h"
#include "random.h"
static uint32 seedbuf[55];
static int seedptr;
void seed_random(uint32 seed) {
int i;
seedptr = 0;
for (i = 0; i < 55; i++) {
seed = seed % 44488 * 48271 - seed / 44488 * 3399;
seedbuf[i] = seed - 1;
}
}
uint32 base_random(void) {
seedptr %= 55;
seedbuf[seedptr] += seedbuf[(seedptr+31)%55];
return seedbuf[seedptr++];
}
uint32 random32(void) {
uint32 a, b, b1, b2;
a = base_random();
b = base_random();
for (b1 = 0x80000000, b2 = 1; b1 > b2; b1 >>= 1, b2 <<= 1) {
uint32 b3 = b1 | b2;
if ((b & b3) != 0 && (b & b3) != b3)
b ^= b3;
}
return a ^ b;
}
/*
* random_upto: generate a uniformly randomised number in the range
* 0,...,limit-1. (Precondition: limit > 0.)
*
* random_upto_biased: generate a number in the same range, but with
* the probability skewed towards the high end by means of taking the
* maximum of 8*bias+1 samples from the uniform distribution on the
* same range. (I don't know why bias is given in that curious way -
* historical reasons, I expect.)
*
* For speed, I separate the implementation of random_upto into the
* two stages of (a) generate a bitmask which reduces a 32-bit random
* number to within a factor of two of the right range, (b) repeatedly
* generate numbers in that range until one is small enough. Splitting
* it up like that means that random_upto_biased can do (a) only once
* even when it does (b) lots of times.
*/
static uint32 random_upto_makemask(uint32 limit) {
uint32 mask = 0xFFFFFFFF;
int i;
for (i = 16; i > 0; i >>= 1)
if ((limit & (mask >> i)) == limit)
mask >>= i;
return mask;
}
static uint32 random_upto_internal(uint32 limit, uint32 mask) {
uint32 ret;
do {
ret = random32() & mask;
} while (ret > limit);
return ret;
}
uint32 random_upto(uint32 limit) {
uint32 mask = random_upto_makemask(limit);
return random_upto_internal(limit, mask);
}
uint32 random_upto_biased(uint32 limit, int bias) {
uint32 mask = random_upto_makemask(limit);
uint32 ret = random_upto_internal(limit, mask);
while (bias--) {
uint32 tmp;
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
tmp = random_upto_internal(limit, mask); if (tmp < ret) ret = tmp;
}
return ret;
}

View file

@ -0,0 +1,12 @@
/*
* random.h - header for random.c
*
* Copyright (c) 2009-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "types.h"
uint32 random32(void);
uint32 random_upto(uint32 limit);
uint32 random_upto_biased(uint32 limit, int bias);

View file

@ -0,0 +1,905 @@
/*
* semi.c: test implementations of mathlib seminumerical functions
*
* Copyright (c) 1999-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <stdio.h>
#include "semi.h"
static void test_rint(uint32 *in, uint32 *out,
int isfloor, int isceil) {
int sign = in[0] & 0x80000000;
int roundup = (isfloor && sign) || (isceil && !sign);
uint32 xh, xl, roundword;
int ex = (in[0] >> 20) & 0x7FF; /* exponent */
int i;
if ((ex > 0x3ff + 52 - 1) || /* things this big can't be fractional */
((in[0] & 0x7FFFFFFF) == 0 && in[1] == 0)) { /* zero */
/* NaN, Inf, a large integer, or zero: just return the input */
out[0] = in[0];
out[1] = in[1];
return;
}
/*
* Special case: ex < 0x3ff, ie our number is in (0,1). Return
* 1 or 0 according to roundup.
*/
if (ex < 0x3ff) {
out[0] = sign | (roundup ? 0x3FF00000 : 0);
out[1] = 0;
return;
}
/*
* We're not short of time here, so we'll do this the hideously
* inefficient way. Shift bit by bit so that the units place is
* somewhere predictable, round, and shift back again.
*/
xh = in[0];
xl = in[1];
roundword = 0;
for (i = ex; i < 0x3ff + 52; i++) {
if (roundword & 1)
roundword |= 2; /* preserve sticky bit */
roundword = (roundword >> 1) | ((xl & 1) << 31);
xl = (xl >> 1) | ((xh & 1) << 31);
xh = xh >> 1;
}
if (roundword && roundup) {
xl++;
xh += (xl==0);
}
for (i = ex; i < 0x3ff + 52; i++) {
xh = (xh << 1) | ((xl >> 31) & 1);
xl = (xl & 0x7FFFFFFF) << 1;
}
out[0] = xh;
out[1] = xl;
}
char *test_ceil(uint32 *in, uint32 *out) {
test_rint(in, out, 0, 1);
return NULL;
}
char *test_floor(uint32 *in, uint32 *out) {
test_rint(in, out, 1, 0);
return NULL;
}
static void test_rintf(uint32 *in, uint32 *out,
int isfloor, int isceil) {
int sign = *in & 0x80000000;
int roundup = (isfloor && sign) || (isceil && !sign);
uint32 x, roundword;
int ex = (*in >> 23) & 0xFF; /* exponent */
int i;
if ((ex > 0x7f + 23 - 1) || /* things this big can't be fractional */
(*in & 0x7FFFFFFF) == 0) { /* zero */
/* NaN, Inf, a large integer, or zero: just return the input */
*out = *in;
return;
}
/*
* Special case: ex < 0x7f, ie our number is in (0,1). Return
* 1 or 0 according to roundup.
*/
if (ex < 0x7f) {
*out = sign | (roundup ? 0x3F800000 : 0);
return;
}
/*
* We're not short of time here, so we'll do this the hideously
* inefficient way. Shift bit by bit so that the units place is
* somewhere predictable, round, and shift back again.
*/
x = *in;
roundword = 0;
for (i = ex; i < 0x7F + 23; i++) {
if (roundword & 1)
roundword |= 2; /* preserve sticky bit */
roundword = (roundword >> 1) | ((x & 1) << 31);
x = x >> 1;
}
if (roundword && roundup) {
x++;
}
for (i = ex; i < 0x7F + 23; i++) {
x = x << 1;
}
*out = x;
}
char *test_ceilf(uint32 *in, uint32 *out) {
test_rintf(in, out, 0, 1);
return NULL;
}
char *test_floorf(uint32 *in, uint32 *out) {
test_rintf(in, out, 1, 0);
return NULL;
}
char *test_fmod(uint32 *a, uint32 *b, uint32 *out) {
int sign;
int32 aex, bex;
uint32 am[2], bm[2];
if (((a[0] & 0x7FFFFFFF) << 1) + !!a[1] > 0xFFE00000 ||
((b[0] & 0x7FFFFFFF) << 1) + !!b[1] > 0xFFE00000) {
/* a or b is NaN: return QNaN, optionally with IVO */
uint32 an, bn;
out[0] = 0x7ff80000;
out[1] = 1;
an = ((a[0] & 0x7FFFFFFF) << 1) + !!a[1];
bn = ((b[0] & 0x7FFFFFFF) << 1) + !!b[1];
if ((an > 0xFFE00000 && an < 0xFFF00000) ||
(bn > 0xFFE00000 && bn < 0xFFF00000))
return "i"; /* at least one SNaN: IVO */
else
return NULL; /* no SNaNs, but at least 1 QNaN */
}
if ((b[0] & 0x7FFFFFFF) == 0 && b[1] == 0) { /* b==0: EDOM */
out[0] = 0x7ff80000;
out[1] = 1;
return "EDOM status=i";
}
if ((a[0] & 0x7FF00000) == 0x7FF00000) { /* a==Inf: EDOM */
out[0] = 0x7ff80000;
out[1] = 1;
return "EDOM status=i";
}
if ((b[0] & 0x7FF00000) == 0x7FF00000) { /* b==Inf: return a */
out[0] = a[0];
out[1] = a[1];
return NULL;
}
if ((a[0] & 0x7FFFFFFF) == 0 && a[1] == 0) { /* a==0: return a */
out[0] = a[0];
out[1] = a[1];
return NULL;
}
/*
* OK. That's the special cases cleared out of the way. Now we
* have finite (though not necessarily normal) a and b.
*/
sign = a[0] & 0x80000000; /* we discard sign of b */
test_frexp(a, am, (uint32 *)&aex);
test_frexp(b, bm, (uint32 *)&bex);
am[0] &= 0xFFFFF, am[0] |= 0x100000;
bm[0] &= 0xFFFFF, bm[0] |= 0x100000;
while (aex >= bex) {
if (am[0] > bm[0] || (am[0] == bm[0] && am[1] >= bm[1])) {
am[1] -= bm[1];
am[0] = am[0] - bm[0] - (am[1] > ~bm[1]);
}
if (aex > bex) {
am[0] = (am[0] << 1) | ((am[1] & 0x80000000) >> 31);
am[1] <<= 1;
aex--;
} else
break;
}
/*
* Renormalise final result; this can be cunningly done by
* passing a denormal to ldexp.
*/
aex += 0x3fd;
am[0] |= sign;
test_ldexp(am, (uint32 *)&aex, out);
return NULL; /* FIXME */
}
char *test_fmodf(uint32 *a, uint32 *b, uint32 *out) {
int sign;
int32 aex, bex;
uint32 am, bm;
if ((*a & 0x7FFFFFFF) > 0x7F800000 ||
(*b & 0x7FFFFFFF) > 0x7F800000) {
/* a or b is NaN: return QNaN, optionally with IVO */
uint32 an, bn;
*out = 0x7fc00001;
an = *a & 0x7FFFFFFF;
bn = *b & 0x7FFFFFFF;
if ((an > 0x7f800000 && an < 0x7fc00000) ||
(bn > 0x7f800000 && bn < 0x7fc00000))
return "i"; /* at least one SNaN: IVO */
else
return NULL; /* no SNaNs, but at least 1 QNaN */
}
if ((*b & 0x7FFFFFFF) == 0) { /* b==0: EDOM */
*out = 0x7fc00001;
return "EDOM status=i";
}
if ((*a & 0x7F800000) == 0x7F800000) { /* a==Inf: EDOM */
*out = 0x7fc00001;
return "EDOM status=i";
}
if ((*b & 0x7F800000) == 0x7F800000) { /* b==Inf: return a */
*out = *a;
return NULL;
}
if ((*a & 0x7FFFFFFF) == 0) { /* a==0: return a */
*out = *a;
return NULL;
}
/*
* OK. That's the special cases cleared out of the way. Now we
* have finite (though not necessarily normal) a and b.
*/
sign = a[0] & 0x80000000; /* we discard sign of b */
test_frexpf(a, &am, (uint32 *)&aex);
test_frexpf(b, &bm, (uint32 *)&bex);
am &= 0x7FFFFF, am |= 0x800000;
bm &= 0x7FFFFF, bm |= 0x800000;
while (aex >= bex) {
if (am >= bm) {
am -= bm;
}
if (aex > bex) {
am <<= 1;
aex--;
} else
break;
}
/*
* Renormalise final result; this can be cunningly done by
* passing a denormal to ldexp.
*/
aex += 0x7d;
am |= sign;
test_ldexpf(&am, (uint32 *)&aex, out);
return NULL; /* FIXME */
}
char *test_ldexp(uint32 *x, uint32 *np, uint32 *out) {
int n = *np;
int32 n2;
uint32 y[2];
int ex = (x[0] >> 20) & 0x7FF; /* exponent */
int sign = x[0] & 0x80000000;
if (ex == 0x7FF) { /* inf/NaN; just return x */
out[0] = x[0];
out[1] = x[1];
return NULL;
}
if ((x[0] & 0x7FFFFFFF) == 0 && x[1] == 0) { /* zero: return x */
out[0] = x[0];
out[1] = x[1];
return NULL;
}
test_frexp(x, y, (uint32 *)&n2);
ex = n + n2;
if (ex > 0x400) { /* overflow */
out[0] = sign | 0x7FF00000;
out[1] = 0;
return "overflow";
}
/*
* Underflow. 2^-1074 is 00000000.00000001; so if ex == -1074
* then we have something [2^-1075,2^-1074). Under round-to-
* nearest-even, this whole interval rounds up to 2^-1074,
* except for the bottom endpoint which rounds to even and is
* an underflow condition.
*
* So, ex < -1074 is definite underflow, and ex == -1074 is
* underflow iff all mantissa bits are zero.
*/
if (ex < -1074 || (ex == -1074 && (y[0] & 0xFFFFF) == 0 && y[1] == 0)) {
out[0] = sign; /* underflow: correctly signed zero */
out[1] = 0;
return "underflow";
}
/*
* No overflow or underflow; should be nice and simple, unless
* we have to denormalise and round the result.
*/
if (ex < -1021) { /* denormalise and round */
uint32 roundword;
y[0] &= 0x000FFFFF;
y[0] |= 0x00100000; /* set leading bit */
roundword = 0;
while (ex < -1021) {
if (roundword & 1)
roundword |= 2; /* preserve sticky bit */
roundword = (roundword >> 1) | ((y[1] & 1) << 31);
y[1] = (y[1] >> 1) | ((y[0] & 1) << 31);
y[0] = y[0] >> 1;
ex++;
}
if (roundword > 0x80000000 || /* round up */
(roundword == 0x80000000 && (y[1] & 1))) { /* round up to even */
y[1]++;
y[0] += (y[1] == 0);
}
out[0] = sign | y[0];
out[1] = y[1];
/* Proper ERANGE underflow was handled earlier, but we still
* expect an IEEE Underflow exception if this partially
* underflowed result is not exact. */
if (roundword)
return "u";
return NULL; /* underflow was handled earlier */
} else {
out[0] = y[0] + (ex << 20);
out[1] = y[1];
return NULL;
}
}
char *test_ldexpf(uint32 *x, uint32 *np, uint32 *out) {
int n = *np;
int32 n2;
uint32 y;
int ex = (*x >> 23) & 0xFF; /* exponent */
int sign = *x & 0x80000000;
if (ex == 0xFF) { /* inf/NaN; just return x */
*out = *x;
return NULL;
}
if ((*x & 0x7FFFFFFF) == 0) { /* zero: return x */
*out = *x;
return NULL;
}
test_frexpf(x, &y, (uint32 *)&n2);
ex = n + n2;
if (ex > 0x80) { /* overflow */
*out = sign | 0x7F800000;
return "overflow";
}
/*
* Underflow. 2^-149 is 00000001; so if ex == -149 then we have
* something [2^-150,2^-149). Under round-to- nearest-even,
* this whole interval rounds up to 2^-149, except for the
* bottom endpoint which rounds to even and is an underflow
* condition.
*
* So, ex < -149 is definite underflow, and ex == -149 is
* underflow iff all mantissa bits are zero.
*/
if (ex < -149 || (ex == -149 && (y & 0x7FFFFF) == 0)) {
*out = sign; /* underflow: correctly signed zero */
return "underflow";
}
/*
* No overflow or underflow; should be nice and simple, unless
* we have to denormalise and round the result.
*/
if (ex < -125) { /* denormalise and round */
uint32 roundword;
y &= 0x007FFFFF;
y |= 0x00800000; /* set leading bit */
roundword = 0;
while (ex < -125) {
if (roundword & 1)
roundword |= 2; /* preserve sticky bit */
roundword = (roundword >> 1) | ((y & 1) << 31);
y = y >> 1;
ex++;
}
if (roundword > 0x80000000 || /* round up */
(roundword == 0x80000000 && (y & 1))) { /* round up to even */
y++;
}
*out = sign | y;
/* Proper ERANGE underflow was handled earlier, but we still
* expect an IEEE Underflow exception if this partially
* underflowed result is not exact. */
if (roundword)
return "u";
return NULL; /* underflow was handled earlier */
} else {
*out = y + (ex << 23);
return NULL;
}
}
char *test_frexp(uint32 *x, uint32 *out, uint32 *nout) {
int ex = (x[0] >> 20) & 0x7FF; /* exponent */
if (ex == 0x7FF) { /* inf/NaN; return x/0 */
out[0] = x[0];
out[1] = x[1];
nout[0] = 0;
return NULL;
}
if (ex == 0) { /* denormals/zeros */
int sign;
uint32 xh, xl;
if ((x[0] & 0x7FFFFFFF) == 0 && x[1] == 0) {
/* zero: return x/0 */
out[0] = x[0];
out[1] = x[1];
nout[0] = 0;
return NULL;
}
sign = x[0] & 0x80000000;
xh = x[0] & 0x7FFFFFFF;
xl = x[1];
ex = 1;
while (!(xh & 0x100000)) {
ex--;
xh = (xh << 1) | ((xl >> 31) & 1);
xl = (xl & 0x7FFFFFFF) << 1;
}
out[0] = sign | 0x3FE00000 | (xh & 0xFFFFF);
out[1] = xl;
nout[0] = ex - 0x3FE;
return NULL;
}
out[0] = 0x3FE00000 | (x[0] & 0x800FFFFF);
out[1] = x[1];
nout[0] = ex - 0x3FE;
return NULL; /* ordinary number; no error */
}
char *test_frexpf(uint32 *x, uint32 *out, uint32 *nout) {
int ex = (*x >> 23) & 0xFF; /* exponent */
if (ex == 0xFF) { /* inf/NaN; return x/0 */
*out = *x;
nout[0] = 0;
return NULL;
}
if (ex == 0) { /* denormals/zeros */
int sign;
uint32 xv;
if ((*x & 0x7FFFFFFF) == 0) {
/* zero: return x/0 */
*out = *x;
nout[0] = 0;
return NULL;
}
sign = *x & 0x80000000;
xv = *x & 0x7FFFFFFF;
ex = 1;
while (!(xv & 0x800000)) {
ex--;
xv = xv << 1;
}
*out = sign | 0x3F000000 | (xv & 0x7FFFFF);
nout[0] = ex - 0x7E;
return NULL;
}
*out = 0x3F000000 | (*x & 0x807FFFFF);
nout[0] = ex - 0x7E;
return NULL; /* ordinary number; no error */
}
char *test_modf(uint32 *x, uint32 *fout, uint32 *iout) {
int ex = (x[0] >> 20) & 0x7FF; /* exponent */
int sign = x[0] & 0x80000000;
uint32 fh, fl;
if (((x[0] & 0x7FFFFFFF) | (!!x[1])) > 0x7FF00000) {
/*
* NaN input: return the same in _both_ outputs.
*/
fout[0] = iout[0] = x[0];
fout[1] = iout[1] = x[1];
return NULL;
}
test_rint(x, iout, 0, 0);
fh = x[0] - iout[0];
fl = x[1] - iout[1];
if (!fh && !fl) { /* no fraction part */
fout[0] = sign;
fout[1] = 0;
return NULL;
}
if (!(iout[0] & 0x7FFFFFFF) && !iout[1]) { /* no integer part */
fout[0] = x[0];
fout[1] = x[1];
return NULL;
}
while (!(fh & 0x100000)) {
ex--;
fh = (fh << 1) | ((fl >> 31) & 1);
fl = (fl & 0x7FFFFFFF) << 1;
}
fout[0] = sign | (ex << 20) | (fh & 0xFFFFF);
fout[1] = fl;
return NULL;
}
char *test_modff(uint32 *x, uint32 *fout, uint32 *iout) {
int ex = (*x >> 23) & 0xFF; /* exponent */
int sign = *x & 0x80000000;
uint32 f;
if ((*x & 0x7FFFFFFF) > 0x7F800000) {
/*
* NaN input: return the same in _both_ outputs.
*/
*fout = *iout = *x;
return NULL;
}
test_rintf(x, iout, 0, 0);
f = *x - *iout;
if (!f) { /* no fraction part */
*fout = sign;
return NULL;
}
if (!(*iout & 0x7FFFFFFF)) { /* no integer part */
*fout = *x;
return NULL;
}
while (!(f & 0x800000)) {
ex--;
f = f << 1;
}
*fout = sign | (ex << 23) | (f & 0x7FFFFF);
return NULL;
}
char *test_copysign(uint32 *x, uint32 *y, uint32 *out)
{
int ysign = y[0] & 0x80000000;
int xhigh = x[0] & 0x7fffffff;
out[0] = ysign | xhigh;
out[1] = x[1];
/* There can be no error */
return NULL;
}
char *test_copysignf(uint32 *x, uint32 *y, uint32 *out)
{
int ysign = y[0] & 0x80000000;
int xhigh = x[0] & 0x7fffffff;
out[0] = ysign | xhigh;
/* There can be no error */
return NULL;
}
char *test_isfinite(uint32 *x, uint32 *out)
{
int xhigh = x[0];
/* Being finite means that the exponent is not 0x7ff */
if ((xhigh & 0x7ff00000) == 0x7ff00000) out[0] = 0;
else out[0] = 1;
return NULL;
}
char *test_isfinitef(uint32 *x, uint32 *out)
{
/* Being finite means that the exponent is not 0xff */
if ((x[0] & 0x7f800000) == 0x7f800000) out[0] = 0;
else out[0] = 1;
return NULL;
}
char *test_isinff(uint32 *x, uint32 *out)
{
/* Being infinite means that our bottom 30 bits equate to 0x7f800000 */
if ((x[0] & 0x7fffffff) == 0x7f800000) out[0] = 1;
else out[0] = 0;
return NULL;
}
char *test_isinf(uint32 *x, uint32 *out)
{
int xhigh = x[0];
int xlow = x[1];
/* Being infinite means that our fraction is zero and exponent is 0x7ff */
if (((xhigh & 0x7fffffff) == 0x7ff00000) && (xlow == 0)) out[0] = 1;
else out[0] = 0;
return NULL;
}
char *test_isnanf(uint32 *x, uint32 *out)
{
/* Being NaN means that our exponent is 0xff and non-0 fraction */
int exponent = x[0] & 0x7f800000;
int fraction = x[0] & 0x007fffff;
if ((exponent == 0x7f800000) && (fraction != 0)) out[0] = 1;
else out[0] = 0;
return NULL;
}
char *test_isnan(uint32 *x, uint32 *out)
{
/* Being NaN means that our exponent is 0x7ff and non-0 fraction */
int exponent = x[0] & 0x7ff00000;
int fractionhigh = x[0] & 0x000fffff;
if ((exponent == 0x7ff00000) && ((fractionhigh != 0) || x[1] != 0))
out[0] = 1;
else out[0] = 0;
return NULL;
}
char *test_isnormalf(uint32 *x, uint32 *out)
{
/* Being normal means exponent is not 0 and is not 0xff */
int exponent = x[0] & 0x7f800000;
if (exponent == 0x7f800000) out[0] = 0;
else if (exponent == 0) out[0] = 0;
else out[0] = 1;
return NULL;
}
char *test_isnormal(uint32 *x, uint32 *out)
{
/* Being normal means exponent is not 0 and is not 0x7ff */
int exponent = x[0] & 0x7ff00000;
if (exponent == 0x7ff00000) out[0] = 0;
else if (exponent == 0) out[0] = 0;
else out[0] = 1;
return NULL;
}
char *test_signbitf(uint32 *x, uint32 *out)
{
/* Sign bit is bit 31 */
out[0] = (x[0] >> 31) & 1;
return NULL;
}
char *test_signbit(uint32 *x, uint32 *out)
{
/* Sign bit is bit 31 */
out[0] = (x[0] >> 31) & 1;
return NULL;
}
char *test_fpclassify(uint32 *x, uint32 *out)
{
int exponent = (x[0] & 0x7ff00000) >> 20;
int fraction = (x[0] & 0x000fffff) | x[1];
if ((exponent == 0x00) && (fraction == 0)) out[0] = 0;
else if ((exponent == 0x00) && (fraction != 0)) out[0] = 4;
else if ((exponent == 0x7ff) && (fraction == 0)) out[0] = 3;
else if ((exponent == 0x7ff) && (fraction != 0)) out[0] = 7;
else out[0] = 5;
return NULL;
}
char *test_fpclassifyf(uint32 *x, uint32 *out)
{
int exponent = (x[0] & 0x7f800000) >> 23;
int fraction = x[0] & 0x007fffff;
if ((exponent == 0x000) && (fraction == 0)) out[0] = 0;
else if ((exponent == 0x000) && (fraction != 0)) out[0] = 4;
else if ((exponent == 0xff) && (fraction == 0)) out[0] = 3;
else if ((exponent == 0xff) && (fraction != 0)) out[0] = 7;
else out[0] = 5;
return NULL;
}
/*
* Internal function that compares doubles in x & y and returns -3, -2, -1, 0,
* 1 if they compare to be signaling, unordered, less than, equal or greater
* than.
*/
static int fpcmp4(uint32 *x, uint32 *y)
{
int result = 0;
/*
* Sort out whether results are ordered or not to begin with
* NaNs have exponent 0x7ff, and non-zero fraction. Signaling NaNs take
* higher priority than quiet ones.
*/
if ((x[0] & 0x7fffffff) >= 0x7ff80000) result = -2;
else if ((x[0] & 0x7fffffff) > 0x7ff00000) result = -3;
else if (((x[0] & 0x7fffffff) == 0x7ff00000) && (x[1] != 0)) result = -3;
if ((y[0] & 0x7fffffff) >= 0x7ff80000 && result != -3) result = -2;
else if ((y[0] & 0x7fffffff) > 0x7ff00000) result = -3;
else if (((y[0] & 0x7fffffff) == 0x7ff00000) && (y[1] != 0)) result = -3;
if (result != 0) return result;
/*
* The two forms of zero are equal
*/
if (((x[0] & 0x7fffffff) == 0) && x[1] == 0 &&
((y[0] & 0x7fffffff) == 0) && y[1] == 0)
return 0;
/*
* If x and y have different signs we can tell that they're not equal
* If x is +ve we have x > y return 1 - otherwise y is +ve return -1
*/
if ((x[0] >> 31) != (y[0] >> 31))
return ((x[0] >> 31) == 0) - ((y[0] >> 31) == 0);
/*
* Now we have both signs the same, let's do an initial compare of the
* values.
*
* Whoever designed IEEE754's floating point formats is very clever and
* earns my undying admiration. Once you remove the sign-bit, the
* floating point numbers can be ordered using the standard <, ==, >
* operators will treating the fp-numbers as integers with that bit-
* pattern.
*/
if ((x[0] & 0x7fffffff) < (y[0] & 0x7fffffff)) result = -1;
else if ((x[0] & 0x7fffffff) > (y[0] & 0x7fffffff)) result = 1;
else if (x[1] < y[1]) result = -1;
else if (x[1] > y[1]) result = 1;
else result = 0;
/*
* Now we return the result - is x is positive (and therefore so is y) we
* return the plain result - otherwise we negate it and return.
*/
if ((x[0] >> 31) == 0) return result;
else return -result;
}
/*
* Internal function that compares floats in x & y and returns -3, -2, -1, 0,
* 1 if they compare to be signaling, unordered, less than, equal or greater
* than.
*/
static int fpcmp4f(uint32 *x, uint32 *y)
{
int result = 0;
/*
* Sort out whether results are ordered or not to begin with
* NaNs have exponent 0xff, and non-zero fraction - we have to handle all
* signaling cases over the quiet ones
*/
if ((x[0] & 0x7fffffff) >= 0x7fc00000) result = -2;
else if ((x[0] & 0x7fffffff) > 0x7f800000) result = -3;
if ((y[0] & 0x7fffffff) >= 0x7fc00000 && result != -3) result = -2;
else if ((y[0] & 0x7fffffff) > 0x7f800000) result = -3;
if (result != 0) return result;
/*
* The two forms of zero are equal
*/
if (((x[0] & 0x7fffffff) == 0) && ((y[0] & 0x7fffffff) == 0))
return 0;
/*
* If x and y have different signs we can tell that they're not equal
* If x is +ve we have x > y return 1 - otherwise y is +ve return -1
*/
if ((x[0] >> 31) != (y[0] >> 31))
return ((x[0] >> 31) == 0) - ((y[0] >> 31) == 0);
/*
* Now we have both signs the same, let's do an initial compare of the
* values.
*
* Whoever designed IEEE754's floating point formats is very clever and
* earns my undying admiration. Once you remove the sign-bit, the
* floating point numbers can be ordered using the standard <, ==, >
* operators will treating the fp-numbers as integers with that bit-
* pattern.
*/
if ((x[0] & 0x7fffffff) < (y[0] & 0x7fffffff)) result = -1;
else if ((x[0] & 0x7fffffff) > (y[0] & 0x7fffffff)) result = 1;
else result = 0;
/*
* Now we return the result - is x is positive (and therefore so is y) we
* return the plain result - otherwise we negate it and return.
*/
if ((x[0] >> 31) == 0) return result;
else return -result;
}
char *test_isgreater(uint32 *x, uint32 *y, uint32 *out)
{
int result = fpcmp4(x, y);
*out = (result == 1);
return result == -3 ? "i" : NULL;
}
char *test_isgreaterequal(uint32 *x, uint32 *y, uint32 *out)
{
int result = fpcmp4(x, y);
*out = (result >= 0);
return result == -3 ? "i" : NULL;
}
char *test_isless(uint32 *x, uint32 *y, uint32 *out)
{
int result = fpcmp4(x, y);
*out = (result == -1);
return result == -3 ? "i" : NULL;
}
char *test_islessequal(uint32 *x, uint32 *y, uint32 *out)
{
int result = fpcmp4(x, y);
*out = (result == -1) || (result == 0);
return result == -3 ? "i" : NULL;
}
char *test_islessgreater(uint32 *x, uint32 *y, uint32 *out)
{
int result = fpcmp4(x, y);
*out = (result == -1) || (result == 1);
return result == -3 ? "i" : NULL;
}
char *test_isunordered(uint32 *x, uint32 *y, uint32 *out)
{
int normal = 0;
int result = fpcmp4(x, y);
test_isnormal(x, out);
normal |= *out;
test_isnormal(y, out);
normal |= *out;
*out = (result == -2) || (result == -3);
return result == -3 ? "i" : NULL;
}
char *test_isgreaterf(uint32 *x, uint32 *y, uint32 *out)
{
int result = fpcmp4f(x, y);
*out = (result == 1);
return result == -3 ? "i" : NULL;
}
char *test_isgreaterequalf(uint32 *x, uint32 *y, uint32 *out)
{
int result = fpcmp4f(x, y);
*out = (result >= 0);
return result == -3 ? "i" : NULL;
}
char *test_islessf(uint32 *x, uint32 *y, uint32 *out)
{
int result = fpcmp4f(x, y);
*out = (result == -1);
return result == -3 ? "i" : NULL;
}
char *test_islessequalf(uint32 *x, uint32 *y, uint32 *out)
{
int result = fpcmp4f(x, y);
*out = (result == -1) || (result == 0);
return result == -3 ? "i" : NULL;
}
char *test_islessgreaterf(uint32 *x, uint32 *y, uint32 *out)
{
int result = fpcmp4f(x, y);
*out = (result == -1) || (result == 1);
return result == -3 ? "i" : NULL;
}
char *test_isunorderedf(uint32 *x, uint32 *y, uint32 *out)
{
int normal = 0;
int result = fpcmp4f(x, y);
test_isnormalf(x, out);
normal |= *out;
test_isnormalf(y, out);
normal |= *out;
*out = (result == -2) || (result == -3);
return result == -3 ? "i" : NULL;
}

View file

@ -0,0 +1,53 @@
/*
* semi.h: header for semi.c
*
* Copyright (c) 1999-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef test_semi_h
#define test_semi_h
#include "types.h"
char *test_ceil(uint32 *in, uint32 *out);
char *test_floor(uint32 *in, uint32 *out);
char *test_fmod(uint32 *a, uint32 *b, uint32 *out);
char *test_ldexp(uint32 *x, uint32 *n, uint32 *out);
char *test_frexp(uint32 *x, uint32 *out, uint32 *nout);
char *test_modf(uint32 *x, uint32 *iout, uint32 *fout);
char *test_ceilf(uint32 *in, uint32 *out);
char *test_floorf(uint32 *in, uint32 *out);
char *test_fmodf(uint32 *a, uint32 *b, uint32 *out);
char *test_ldexpf(uint32 *x, uint32 *n, uint32 *out);
char *test_frexpf(uint32 *x, uint32 *out, uint32 *nout);
char *test_modff(uint32 *x, uint32 *iout, uint32 *fout);
char *test_copysign(uint32 *x, uint32 *y, uint32 *out);
char *test_copysignf(uint32 *x, uint32 *y, uint32 *out);
char *test_isfinite(uint32 *x, uint32 *out);
char *test_isfinitef(uint32 *x, uint32 *out);
char *test_isinf(uint32 *x, uint32 *out);
char *test_isinff(uint32 *x, uint32 *out);
char *test_isnan(uint32 *x, uint32 *out);
char *test_isnanf(uint32 *x, uint32 *out);
char *test_isnormal(uint32 *x, uint32 *out);
char *test_isnormalf(uint32 *x, uint32 *out);
char *test_signbit(uint32 *x, uint32 *out);
char *test_signbitf(uint32 *x, uint32 *out);
char *test_fpclassify(uint32 *x, uint32 *out);
char *test_fpclassifyf(uint32 *x, uint32 *out);
char *test_isgreater(uint32 *x, uint32 *y, uint32 *out);
char *test_isgreaterequal(uint32 *x, uint32 *y, uint32 *out);
char *test_isless(uint32 *x, uint32 *y, uint32 *out);
char *test_islessequal(uint32 *x, uint32 *y, uint32 *out);
char *test_islessgreater(uint32 *x, uint32 *y, uint32 *out);
char *test_isunordered(uint32 *x, uint32 *y, uint32 *out);
char *test_isgreaterf(uint32 *x, uint32 *y, uint32 *out);
char *test_isgreaterequalf(uint32 *x, uint32 *y, uint32 *out);
char *test_islessf(uint32 *x, uint32 *y, uint32 *out);
char *test_islessequalf(uint32 *x, uint32 *y, uint32 *out);
char *test_islessgreaterf(uint32 *x, uint32 *y, uint32 *out);
char *test_isunorderedf(uint32 *x, uint32 *y, uint32 *out);
#endif

View file

@ -0,0 +1,25 @@
/*
* types.h
*
* Copyright (c) 2005-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef mathtest_types_h
#define mathtest_types_h
#include <limits.h>
#if UINT_MAX == 4294967295
typedef unsigned int uint32;
typedef int int32;
#define I32 ""
#elif ULONG_MAX == 4294967295
typedef unsigned long uint32;
typedef long int32;
#define I32 "l"
#else
#error Could not find an unsigned 32-bit integer type
#endif
#endif

View file

@ -0,0 +1,261 @@
/*
* wrappers.c - wrappers to modify output of MPFR/MPC test functions
*
* Copyright (c) 2014-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include "intern.h"
void wrapper_init(wrapperctx *ctx)
{
int i;
ctx->nops = ctx->nresults = 0;
for (i = 0; i < 2; i++) {
ctx->mpfr_ops[i] = NULL;
ctx->mpc_ops[i] = NULL;
ctx->ieee_ops[i] = NULL;
}
ctx->mpfr_result = NULL;
ctx->mpc_result = NULL;
ctx->ieee_result = NULL;
ctx->need_regen = 0;
}
void wrapper_op_real(wrapperctx *ctx, const mpfr_t r,
int size, const uint32 *ieee)
{
assert(ctx->nops < 2);
ctx->mpfr_ops[ctx->nops] = r;
ctx->ieee_ops[ctx->nops] = ieee;
ctx->size_ops[ctx->nops] = size;
ctx->nops++;
}
void wrapper_op_complex(wrapperctx *ctx, const mpc_t c,
int size, const uint32 *ieee)
{
assert(ctx->nops < 2);
ctx->mpc_ops[ctx->nops] = c;
ctx->ieee_ops[ctx->nops] = ieee;
ctx->size_ops[ctx->nops] = size;
ctx->nops++;
}
void wrapper_result_real(wrapperctx *ctx, mpfr_t r,
int size, uint32 *ieee)
{
assert(ctx->nresults < 1);
ctx->mpfr_result = r;
ctx->ieee_result = ieee;
ctx->size_result = size;
ctx->nresults++;
}
void wrapper_result_complex(wrapperctx *ctx, mpc_t c,
int size, uint32 *ieee)
{
assert(ctx->nresults < 1);
ctx->mpc_result = c;
ctx->ieee_result = ieee;
ctx->size_result = size;
ctx->nresults++;
}
int wrapper_run(wrapperctx *ctx, wrapperfunc wrappers[MAXWRAPPERS])
{
int i;
for (i = 0; i < MAXWRAPPERS && wrappers[i]; i++)
wrappers[i](ctx);
universal_wrapper(ctx);
return ctx->need_regen;
}
mpfr_srcptr wrapper_get_mpfr(wrapperctx *ctx, int op)
{
if (op < 0) {
assert(ctx->mpfr_result);
return ctx->mpfr_result;
} else {
assert(ctx->mpfr_ops[op]);
return ctx->mpfr_ops[op];
}
}
const uint32 *wrapper_get_ieee(wrapperctx *ctx, int op)
{
if (op < 0) {
assert(ctx->mpfr_result);
return ctx->ieee_result;
} else {
assert(ctx->mpfr_ops[op]);
return ctx->ieee_ops[op];
}
}
int wrapper_get_nops(wrapperctx *ctx)
{
return ctx->nops;
}
int wrapper_get_size(wrapperctx *ctx, int op)
{
if (op < 0) {
assert(ctx->mpfr_result || ctx->mpc_result);
return ctx->size_result;
} else {
assert(ctx->mpfr_ops[op] || ctx->mpc_ops[op]);
return ctx->size_ops[op];
}
}
int wrapper_is_complex(wrapperctx *ctx, int op)
{
if (op < 0) {
assert(ctx->mpfr_result || ctx->mpc_result);
return ctx->mpc_result != NULL;
} else {
assert(ctx->mpfr_ops[op] || ctx->mpc_ops[op]);
return ctx->mpc_ops[op] != NULL;
}
}
mpc_srcptr wrapper_get_mpc(wrapperctx *ctx, int op)
{
if (op < 0) {
assert(ctx->mpc_result);
return ctx->mpc_result;
} else {
assert(ctx->mpc_ops[op]);
return ctx->mpc_ops[op];
}
}
mpfr_srcptr wrapper_get_mpfr_r(wrapperctx *ctx, int op)
{
if (op < 0) {
assert(ctx->mpc_result);
return mpc_realref(ctx->mpc_result);
} else {
assert(ctx->mpc_ops[op]);
return mpc_realref(ctx->mpc_ops[op]);
}
}
mpfr_srcptr wrapper_get_mpfr_i(wrapperctx *ctx, int op)
{
if (op < 0) {
assert(ctx->mpc_result);
return mpc_imagref(ctx->mpc_result);
} else {
assert(ctx->mpc_ops[op]);
return mpc_imagref(ctx->mpc_ops[op]);
}
}
const uint32 *wrapper_get_ieee_r(wrapperctx *ctx, int op)
{
if (op < 0) {
assert(ctx->mpc_result);
return ctx->ieee_result;
} else {
assert(ctx->mpc_ops[op]);
return ctx->ieee_ops[op];
}
}
const uint32 *wrapper_get_ieee_i(wrapperctx *ctx, int op)
{
if (op < 0) {
assert(ctx->mpc_result);
return ctx->ieee_result + 4;
} else {
assert(ctx->mpc_ops[op]);
return ctx->ieee_ops[op] + 2;
}
}
void wrapper_set_sign(wrapperctx *ctx, uint32 sign)
{
assert(ctx->mpfr_result);
ctx->ieee_result[0] |= (sign & 0x80000000U);
}
void wrapper_set_sign_r(wrapperctx *ctx, uint32 sign)
{
assert(ctx->mpc_result);
ctx->ieee_result[0] |= (sign & 0x80000000U);
}
void wrapper_set_sign_i(wrapperctx *ctx, uint32 sign)
{
assert(ctx->mpc_result);
ctx->ieee_result[4] |= (sign & 0x80000000U);
}
void wrapper_set_nan(wrapperctx *ctx)
{
assert(ctx->mpfr_result);
mpfr_set_nan(ctx->mpfr_result);
ctx->need_regen = 1;
}
void wrapper_set_nan_r(wrapperctx *ctx)
{
assert(ctx->mpc_result);
mpfr_set_nan(mpc_realref(ctx->mpc_result)); /* FIXME: better way? */
ctx->need_regen = 1;
}
void wrapper_set_nan_i(wrapperctx *ctx)
{
assert(ctx->mpc_result);
mpfr_set_nan(mpc_imagref(ctx->mpc_result)); /* FIXME: better way? */
ctx->need_regen = 1;
}
void wrapper_set_int(wrapperctx *ctx, int val)
{
assert(ctx->mpfr_result);
mpfr_set_si(ctx->mpfr_result, val, GMP_RNDN);
ctx->need_regen = 1;
}
void wrapper_set_int_r(wrapperctx *ctx, int val)
{
assert(ctx->mpc_result);
mpfr_set_si(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
ctx->need_regen = 1;
}
void wrapper_set_int_i(wrapperctx *ctx, int val)
{
assert(ctx->mpc_result);
mpfr_set_si(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
ctx->need_regen = 1;
}
void wrapper_set_mpfr(wrapperctx *ctx, const mpfr_t val)
{
assert(ctx->mpfr_result);
mpfr_set(ctx->mpfr_result, val, GMP_RNDN);
ctx->need_regen = 1;
}
void wrapper_set_mpfr_r(wrapperctx *ctx, const mpfr_t val)
{
assert(ctx->mpc_result);
mpfr_set(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
ctx->need_regen = 1;
}
void wrapper_set_mpfr_i(wrapperctx *ctx, const mpfr_t val)
{
assert(ctx->mpc_result);
mpfr_set(mpc_realref(ctx->mpc_result), val, GMP_RNDN);
ctx->need_regen = 1;
}

View file

@ -0,0 +1,114 @@
/*
* wrappers.h - wrappers to modify output of MPFR/MPC test functions
*
* Copyright (c) 2014-2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
typedef struct {
/* Structure type should be considered opaque outside wrappers.c,
* though we have to define it here so its size is known. */
int nops;
int nresults;
mpfr_srcptr mpfr_ops[2];
mpfr_ptr mpfr_result;
mpc_srcptr mpc_ops[2];
mpc_ptr mpc_result;
const uint32 *ieee_ops[2];
uint32 *ieee_result;
int size_ops[2];
int size_result;
int need_regen;
} wrapperctx;
typedef void (*wrapperfunc)(wrapperctx *ctx);
#define MAXWRAPPERS 3
/*
* Functions for the test harness to call.
*
* When the test harness executes a test function, it should
* initialise a wrapperctx with wrapper_init, then provide all the
* operands and results in both mpfr/mpc and IEEE (+ extrabits)
* formats via wrapper_op_* and wrapper_result_*. Then it should run
* the function's wrappers using wrapper_run(), and if that returns
* true then the primary result has been rewritten in mpfr/mpc format
* and it should therefore retranslate into IEEE.
*
* 'size' in all prototypes below represents an FP type by giving the
* number of 32-bit words it requires, so 1=float and 2=double. Input
* operands will be that many words (or that many for both their real
* and imag parts); outputs will have one extra word for 'extrabits'.
*
* This system only applies at all to reference functions using
* mpfr/mpc. The seminumerical functions we implement in pure IEEE
* form are expected to handle all their own special cases correctly.
*/
void wrapper_init(wrapperctx *ctx);
/* Real operand. */
void wrapper_op_real(wrapperctx *ctx, const mpfr_t r,
int size, const uint32 *ieee);
/* Complex operand. Real part starts at ieee[0], the imag part at ieee[2]. */
void wrapper_op_complex(wrapperctx *ctx, const mpc_t c,
int size, const uint32 *ieee);
/* Real result. ieee contains size+1 words, as discussed above. */
void wrapper_result_real(wrapperctx *ctx, mpfr_t r,
int size, uint32 *ieee);
/* Complex result. ieee contains size+1 words of real part starting at
* ieee[0], and another size+1 of imag part starting at ieee[4]. */
void wrapper_result_complex(wrapperctx *ctx, mpc_t c,
int size, uint32 *ieee);
int wrapper_run(wrapperctx *ctx, wrapperfunc wrappers[MAXWRAPPERS]);
/*
* Functions for wrappers to call. 'op' indicates which operand is
* being requested: 0,1 means first and second, and -1 means the
* result.
*/
mpfr_srcptr wrapper_get_mpfr(wrapperctx *ctx, int op);
const uint32 *wrapper_get_ieee(wrapperctx *ctx, int op);
mpc_srcptr wrapper_get_mpc(wrapperctx *ctx, int op);
mpfr_srcptr wrapper_get_mpfr_r(wrapperctx *ctx, int op);
mpfr_srcptr wrapper_get_mpfr_i(wrapperctx *ctx, int op);
const uint32 *wrapper_get_ieee_r(wrapperctx *ctx, int op);
const uint32 *wrapper_get_ieee_i(wrapperctx *ctx, int op);
/* Query operand count + types */
int wrapper_get_nops(wrapperctx *ctx);
int wrapper_get_size(wrapperctx *ctx, int op);
int wrapper_is_complex(wrapperctx *ctx, int op);
/* Change just the sign of the result. Only the top bit of 'sign' is used. */
void wrapper_set_sign(wrapperctx *ctx, uint32 sign);
void wrapper_set_sign_r(wrapperctx *ctx, uint32 sign);
void wrapper_set_sign_i(wrapperctx *ctx, uint32 sign);
/* Set a result to NaN. */
void wrapper_set_nan(wrapperctx *ctx);
void wrapper_set_nan_r(wrapperctx *ctx);
void wrapper_set_nan_i(wrapperctx *ctx);
/* Set a result to an integer value (converted to the appropriate
* float format). */
void wrapper_set_int(wrapperctx *ctx, int val);
void wrapper_set_int_r(wrapperctx *ctx, int val);
void wrapper_set_int_i(wrapperctx *ctx, int val);
/* Set a result to a new MPFR float. */
void wrapper_set_mpfr(wrapperctx *ctx, const mpfr_t val);
void wrapper_set_mpfr_r(wrapperctx *ctx, const mpfr_t val);
void wrapper_set_mpfr_i(wrapperctx *ctx, const mpfr_t val);
/*
* A universal wrapper called for _all_ functions, that doesn't have
* to be specified individually everywhere.
*/
void universal_wrapper(wrapperctx *ctx);

View file

@ -0,0 +1,315 @@
#!/bin/bash
# ULP error check script.
#
# Copyright (c) 2019-2020, Arm Limited.
# SPDX-License-Identifier: MIT
#set -x
set -eu
# cd to bin directory.
cd "${0%/*}"
rmodes='n u d z'
#rmodes=n
flags="${ULPFLAGS:--q}"
emu="$@"
FAIL=0
PASS=0
t() {
[ $r = "n" ] && Lt=$L || Lt=$Ldir
$emu ./ulp -r $r -e $Lt $flags "$@" && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
}
check() {
$emu ./ulp -f -q "$@" >/dev/null
}
Ldir=0.5
for r in $rmodes
do
L=0.01
t exp 0 0xffff000000000000 10000
t exp 0x1p-6 0x1p6 40000
t exp -0x1p-6 -0x1p6 40000
t exp 633.3 733.3 10000
t exp -633.3 -777.3 10000
L=0.01
t exp2 0 0xffff000000000000 10000
t exp2 0x1p-6 0x1p6 40000
t exp2 -0x1p-6 -0x1p6 40000
t exp2 633.3 733.3 10000
t exp2 -633.3 -777.3 10000
L=0.02
t log 0 0xffff000000000000 10000
t log 0x1p-4 0x1p4 40000
t log 0 inf 40000
L=0.05
t log2 0 0xffff000000000000 10000
t log2 0x1p-4 0x1p4 40000
t log2 0 inf 40000
L=0.05
t pow 0.5 2.0 x 0 inf 20000
t pow -0.5 -2.0 x 0 inf 20000
t pow 0.5 2.0 x -0 -inf 20000
t pow -0.5 -2.0 x -0 -inf 20000
t pow 0.5 2.0 x 0x1p-10 0x1p10 40000
t pow 0.5 2.0 x -0x1p-10 -0x1p10 40000
t pow 0 inf x 0.5 2.0 80000
t pow 0 inf x -0.5 -2.0 80000
t pow 0x1.fp-1 0x1.08p0 x 0x1p8 0x1p17 80000
t pow 0x1.fp-1 0x1.08p0 x -0x1p8 -0x1p17 80000
t pow 0 0x1p-1000 x 0 1.0 50000
t pow 0x1p1000 inf x 0 1.0 50000
t pow 0x1.ffffffffffff0p-1 0x1.0000000000008p0 x 0x1p60 0x1p68 50000
t pow 0x1.ffffffffff000p-1 0x1p0 x 0x1p50 0x1p52 50000
t pow -0x1.ffffffffff000p-1 -0x1p0 x 0x1p50 0x1p52 50000
L=1.0
Ldir=0.9
t erf 0 0xffff000000000000 10000
t erf 0x1p-1022 0x1p-26 40000
t erf -0x1p-1022 -0x1p-26 40000
t erf 0x1p-26 0x1p3 40000
t erf -0x1p-26 -0x1p3 40000
t erf 0 inf 40000
Ldir=0.5
L=0.01
t expf 0 0xffff0000 10000
t expf 0x1p-14 0x1p8 50000
t expf -0x1p-14 -0x1p8 50000
L=0.01
t exp2f 0 0xffff0000 10000
t exp2f 0x1p-14 0x1p8 50000
t exp2f -0x1p-14 -0x1p8 50000
L=0.32
t logf 0 0xffff0000 10000
t logf 0x1p-4 0x1p4 50000
t logf 0 inf 50000
L=0.26
t log2f 0 0xffff0000 10000
t log2f 0x1p-4 0x1p4 50000
t log2f 0 inf 50000
L=0.06
t sinf 0 0xffff0000 10000
t sinf 0x1p-14 0x1p54 50000
t sinf -0x1p-14 -0x1p54 50000
L=0.06
t cosf 0 0xffff0000 10000
t cosf 0x1p-14 0x1p54 50000
t cosf -0x1p-14 -0x1p54 50000
L=0.06
t sincosf_sinf 0 0xffff0000 10000
t sincosf_sinf 0x1p-14 0x1p54 50000
t sincosf_sinf -0x1p-14 -0x1p54 50000
L=0.06
t sincosf_cosf 0 0xffff0000 10000
t sincosf_cosf 0x1p-14 0x1p54 50000
t sincosf_cosf -0x1p-14 -0x1p54 50000
L=0.4
t powf 0x1p-1 0x1p1 x 0x1p-7 0x1p7 50000
t powf 0x1p-1 0x1p1 x -0x1p-7 -0x1p7 50000
t powf 0x1p-70 0x1p70 x 0x1p-1 0x1p1 50000
t powf 0x1p-70 0x1p70 x -0x1p-1 -0x1p1 50000
t powf 0x1.ep-1 0x1.1p0 x 0x1p8 0x1p14 50000
t powf 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p14 50000
L=0.6
Ldir=0.9
t erff 0 0xffff0000 10000
t erff 0x1p-127 0x1p-26 40000
t erff -0x1p-127 -0x1p-26 40000
t erff 0x1p-26 0x1p3 40000
t erff -0x1p-26 -0x1p3 40000
t erff 0 inf 40000
Ldir=0.5
done
# vector functions
Ldir=0.5
r='n'
flags="${ULPFLAGS:--q} -f"
runs=
check __s_exp 1 && runs=1
runv=
check __v_exp 1 && runv=1
runvn=
check __vn_exp 1 && runvn=1
range_exp='
0 0xffff000000000000 10000
0x1p-6 0x1p6 400000
-0x1p-6 -0x1p6 400000
633.3 733.3 10000
-633.3 -777.3 10000
'
range_log='
0 0xffff000000000000 10000
0x1p-4 0x1p4 400000
0 inf 400000
'
range_pow='
0x1p-1 0x1p1 x 0x1p-10 0x1p10 50000
0x1p-1 0x1p1 x -0x1p-10 -0x1p10 50000
0x1p-500 0x1p500 x 0x1p-1 0x1p1 50000
0x1p-500 0x1p500 x -0x1p-1 -0x1p1 50000
0x1.ep-1 0x1.1p0 x 0x1p8 0x1p16 50000
0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p16 50000
'
range_sin='
0 0xffff000000000000 10000
0x1p-4 0x1p4 400000
-0x1p-23 0x1p23 400000
'
range_cos="$range_sin"
range_expf='
0 0xffff0000 10000
0x1p-14 0x1p8 500000
-0x1p-14 -0x1p8 500000
'
range_expf_1u="$range_expf"
range_exp2f="$range_expf"
range_exp2f_1u="$range_expf"
range_logf='
0 0xffff0000 10000
0x1p-4 0x1p4 500000
'
range_sinf='
0 0xffff0000 10000
0x1p-4 0x1p4 300000
-0x1p-9 -0x1p9 300000
'
range_cosf="$range_sinf"
range_powf='
0x1p-1 0x1p1 x 0x1p-7 0x1p7 50000
0x1p-1 0x1p1 x -0x1p-7 -0x1p7 50000
0x1p-70 0x1p70 x 0x1p-1 0x1p1 50000
0x1p-70 0x1p70 x -0x1p-1 -0x1p1 50000
0x1.ep-1 0x1.1p0 x 0x1p8 0x1p14 50000
0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p14 50000
'
# error limits
L_exp=1.9
L_log=1.2
L_pow=0.05
L_sin=3.0
L_cos=3.0
L_expf=1.49
L_expf_1u=0.4
L_exp2f=1.49
L_exp2f_1u=0.4
L_logf=2.9
L_sinf=1.4
L_cosf=1.4
L_powf=2.1
while read G F R
do
[ "$R" = 1 ] || continue
case "$G" in \#*) continue ;; esac
eval range="\${range_$G}"
eval L="\${L_$G}"
while read X
do
[ -n "$X" ] || continue
case "$X" in \#*) continue ;; esac
t $F $X
done << EOF
$range
EOF
done << EOF
# group symbol run
exp __s_exp $runs
exp __v_exp $runv
exp __vn_exp $runvn
exp _ZGVnN2v_exp $runvn
log __s_log $runs
log __v_log $runv
log __vn_log $runvn
log _ZGVnN2v_log $runvn
pow __s_pow $runs
pow __v_pow $runv
pow __vn_pow $runvn
pow _ZGVnN2vv_pow $runvn
sin __s_sin $runs
sin __v_sin $runv
sin __vn_sin $runvn
sin _ZGVnN2v_sin $runvn
cos __s_cos $runs
cos __v_cos $runv
cos __vn_cos $runvn
cos _ZGVnN2v_cos $runvn
expf __s_expf $runs
expf __v_expf $runv
expf __vn_expf $runvn
expf _ZGVnN4v_expf $runvn
expf_1u __s_expf_1u $runs
expf_1u __v_expf_1u $runv
expf_1u __vn_expf_1u $runvn
exp2f __s_exp2f $runs
exp2f __v_exp2f $runv
exp2f __vn_exp2f $runvn
exp2f _ZGVnN4v_exp2f $runvn
exp2f_1u __s_exp2f_1u $runs
exp2f_1u __v_exp2f_1u $runv
exp2f_1u __vn_exp2f_1u $runvn
logf __s_logf $runs
logf __v_logf $runv
logf __vn_logf $runvn
logf _ZGVnN4v_logf $runvn
sinf __s_sinf $runs
sinf __v_sinf $runv
sinf __vn_sinf $runvn
sinf _ZGVnN4v_sinf $runvn
cosf __s_cosf $runs
cosf __v_cosf $runv
cosf __vn_cosf $runvn
cosf _ZGVnN4v_cosf $runvn
powf __s_powf $runs
powf __v_powf $runv
powf __vn_powf $runvn
powf _ZGVnN4vv_powf $runvn
EOF
[ 0 -eq $FAIL ] || {
echo "FAILED $FAIL PASSED $PASS"
exit 1
}

View file

@ -0,0 +1,25 @@
; cosf.tst - Directed test cases for SP cosine
;
; Copyright (c) 2007-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=cosf op1=7fc00001 result=7fc00001 errno=0
func=cosf op1=ffc00001 result=7fc00001 errno=0
func=cosf op1=7f800001 result=7fc00001 errno=0 status=i
func=cosf op1=ff800001 result=7fc00001 errno=0 status=i
func=cosf op1=7f800000 result=7fc00001 errno=EDOM status=i
func=cosf op1=ff800000 result=7fc00001 errno=EDOM status=i
func=cosf op1=00000000 result=3f800000 errno=0
func=cosf op1=80000000 result=3f800000 errno=0
; SDCOMP-26094: check cosf in the cases for which the range reducer
; returns values furthest beyond its nominal upper bound of pi/4.
func=cosf op1=46427f1b result=3f34dc5c.565 error=0
func=cosf op1=4647e568 result=3f34dc33.c1f error=0
func=cosf op1=46428bac result=bf34dbf2.8e3 error=0
func=cosf op1=4647f1f9 result=bf34dbc9.f9b error=0
func=cosf op1=4647fe8a result=3f34db60.313 error=0
func=cosf op1=45d8d7f1 result=bf35006a.7fd error=0
func=cosf op1=45d371a4 result=3f350056.39b error=0
func=cosf op1=45ce0b57 result=bf350041.f38 error=0
func=cosf op1=45d35882 result=bf34ffec.868 error=0
func=cosf op1=45cdf235 result=3f34ffd8.404 error=0

View file

@ -0,0 +1,17 @@
; erf.tst - Directed test cases for erf
;
; Copyright (c) 2007-2020, Arm Limited.
; SPDX-License-Identifier: MIT
func=erf op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
func=erf op1=fff80000.00000001 result=7ff80000.00000001 errno=0
func=erf op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
func=erf op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
func=erf op1=7ff00000.00000000 result=3ff00000.00000000 errno=0
func=erf op1=fff00000.00000000 result=bff00000.00000000 errno=0
func=erf op1=00000000.00000000 result=00000000.00000000 errno=ERANGE
func=erf op1=80000000.00000000 result=80000000.00000000 errno=ERANGE
func=erf op1=00000000.00000001 result=00000000.00000001 errno=0 status=ux
func=erf op1=80000000.00000001 result=80000000.00000001 errno=0 status=ux
func=erf op1=3ff00000.00000000 result=3feaf767.a741088a.c6d errno=0
func=erf op1=bff00000.00000000 result=bfeaf767.a741088a.c6d errno=0

View file

@ -0,0 +1,17 @@
; erff.tst
;
; Copyright (c) 2007-2020, Arm Limited.
; SPDX-License-Identifier: MIT
func=erff op1=7fc00001 result=7fc00001 errno=0
func=erff op1=ffc00001 result=7fc00001 errno=0
func=erff op1=7f800001 result=7fc00001 errno=0 status=i
func=erff op1=ff800001 result=7fc00001 errno=0 status=i
func=erff op1=7f800000 result=3f800000 errno=0
func=erff op1=ff800000 result=bf800000 errno=0
func=erff op1=00000000 result=00000000 errno=ERANGE
func=erff op1=80000000 result=80000000 errno=ERANGE
func=erff op1=00000001 result=00000001 errno=0 status=ux
func=erff op1=80000001 result=80000001 errno=0 status=ux
func=erff op1=3f800000 result=3f57bb3d.3a0 errno=0
func=erff op1=bf800000 result=bf57bb3d.3a0 errno=0

View file

@ -0,0 +1,31 @@
; Directed test cases for exp
;
; Copyright (c) 2018-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=exp op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
func=exp op1=fff80000.00000001 result=7ff80000.00000001 errno=0
func=exp op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
func=exp op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
func=exp op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
func=exp op1=fff00000.00000000 result=00000000.00000000 errno=0
func=exp op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
func=exp op1=ffefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
func=exp op1=00000000.00000000 result=3ff00000.00000000 errno=0
func=exp op1=80000000.00000000 result=3ff00000.00000000 errno=0
func=exp op1=00000000.00000001 result=3ff00000.00000000 errno=0
func=exp op1=80000000.00000001 result=3ff00000.00000000 errno=0
func=exp op1=3c900000.00000000 result=3ff00000.00000000.400 errno=0
func=exp op1=bc900000.00000000 result=3fefffff.ffffffff.800 errno=0
func=exp op1=3fe00000.00000000 result=3ffa6129.8e1e069b.c97 errno=0
func=exp op1=bfe00000.00000000 result=3fe368b2.fc6f9609.fe8 errno=0
func=exp op1=3ff00000.00000000 result=4005bf0a.8b145769.535 errno=0
func=exp op1=bff00000.00000000 result=3fd78b56.362cef37.c6b errno=0
func=exp op1=40000000.00000000 result=401d8e64.b8d4ddad.cc3 errno=0
func=exp op1=c0000000.00000000 result=3fc152aa.a3bf81cb.9fe errno=0
func=exp op1=3ff12345.6789abcd result=40075955.c34718ed.6e3 errno=0
func=exp op1=40862e42.fefa39ef result=7fefffff.ffffff2a.1b1 errno=0
func=exp op1=40862e42.fefa39f0 result=7ff00000.00000000 errno=ERANGE status=ox
func=exp op1=c0874910.d52d3051 result=00000000.00000001 status=ux
func=exp op1=c0874910.d52d3052 result=00000000.00000000 errno=ERANGE status=ux
func=exp op1=c085d589.f2fe5107 result=00f00000.000000f1.46b errno=0

View file

@ -0,0 +1,30 @@
; Directed test cases for exp2
;
; Copyright (c) 2018-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=exp2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
func=exp2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
func=exp2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
func=exp2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
func=exp2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
func=exp2 op1=fff00000.00000000 result=00000000.00000000 errno=0
func=exp2 op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
func=exp2 op1=ffefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
func=exp2 op1=00000000.00000000 result=3ff00000.00000000 errno=0
func=exp2 op1=80000000.00000000 result=3ff00000.00000000 errno=0
func=exp2 op1=00000000.00000001 result=3ff00000.00000000 errno=0
func=exp2 op1=80000000.00000001 result=3ff00000.00000000 errno=0
func=exp2 op1=3ca00000.00000000 result=3ff00000.00000000.58c errno=0
func=exp2 op1=bc900000.00000000 result=3fefffff.ffffffff.a74 errno=0
func=exp2 op1=3fe00000.00000000 result=3ff6a09e.667f3bcc.909 errno=0
func=exp2 op1=bfe00000.00000000 result=3fe6a09e.667f3bcc.909 errno=0
func=exp2 op1=3ff00000.00000000 result=40000000.00000000 errno=0
func=exp2 op1=bff00000.00000000 result=3fe00000.00000000 errno=0
func=exp2 op1=40000000.00000000 result=40100000.00000000 errno=0
func=exp2 op1=c0000000.00000000 result=3fd00000.00000000 errno=0
func=exp2 op1=3ff12345.6789abcd result=4000cef3.c5d12321.663 errno=0
func=exp2 op1=408fffff.ffffffff result=7fefffff.fffffd3a.37a errno=0
func=exp2 op1=40900000.00000000 result=7ff00000.00000000 errno=ERANGE status=ox
func=exp2 op1=c090ca00.00000000 result=00000000.00000000.b50 status=ux
func=exp2 op1=c090cc00.00000000 result=00000000.00000000 errno=ERANGE status=ux

View file

@ -0,0 +1,25 @@
; exp2f.tst - Directed test cases for exp2f
;
; Copyright (c) 2017-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=exp2f op1=7fc00001 result=7fc00001 errno=0
func=exp2f op1=ffc00001 result=7fc00001 errno=0
func=exp2f op1=7f800001 result=7fc00001 errno=0 status=i
func=exp2f op1=ff800001 result=7fc00001 errno=0 status=i
func=exp2f op1=7f800000 result=7f800000 errno=0
func=exp2f op1=7f7fffff result=7f800000 errno=ERANGE status=ox
func=exp2f op1=ff800000 result=00000000 errno=0
func=exp2f op1=ff7fffff result=00000000 errno=ERANGE status=ux
func=exp2f op1=00000000 result=3f800000 errno=0
func=exp2f op1=80000000 result=3f800000 errno=0
func=exp2f op1=42fa0001 result=7e00002c.5c8 errno=0
func=exp2f op1=42ffffff result=7f7fffa7.470 errno=0
func=exp2f op1=43000000 result=7f800000 errno=ERANGE status=ox
func=exp2f op1=43000001 result=7f800000 errno=ERANGE status=ox
func=exp2f op1=c2fa0001 result=00ffffa7.470 errno=0
func=exp2f op1=c2fc0000 result=00800000 errno=0
func=exp2f op1=c2fc0001 result=007fffd3.a38 errno=0 status=ux
func=exp2f op1=c3150000 result=00000001 errno=0
func=exp2f op1=c3158000 result=00000000.800 errno=ERANGE status=ux
func=exp2f op1=c3165432 result=00000000.4bd errno=ERANGE status=ux

View file

@ -0,0 +1,23 @@
; expf.tst - Directed test cases for expf
;
; Copyright (c) 2007-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=expf op1=7fc00001 result=7fc00001 errno=0
func=expf op1=ffc00001 result=7fc00001 errno=0
func=expf op1=7f800001 result=7fc00001 errno=0 status=i
func=expf op1=ff800001 result=7fc00001 errno=0 status=i
func=expf op1=7f800000 result=7f800000 errno=0
func=expf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
func=expf op1=ff800000 result=00000000 errno=0
func=expf op1=ff7fffff result=00000000 errno=ERANGE status=ux
func=expf op1=00000000 result=3f800000 errno=0
func=expf op1=80000000 result=3f800000 errno=0
func=expf op1=42affff8 result=7ef87ed4.e0c errno=0
func=expf op1=42b00008 result=7ef88698.f67 errno=0
func=expf op1=42cffff8 result=7f800000 errno=ERANGE status=ox
func=expf op1=42d00008 result=7f800000 errno=ERANGE status=ox
func=expf op1=c2affff8 result=0041eecc.041 errno=0 status=ux
func=expf op1=c2b00008 result=0041ecbc.95e errno=0 status=ux
func=expf op1=c2cffff8 result=00000000 errno=ERANGE status=ux
func=expf op1=c2d00008 result=00000000 errno=ERANGE status=ux

View file

@ -0,0 +1,21 @@
; Directed test cases for log
;
; Copyright (c) 2018-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=log op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
func=log op1=fff80000.00000001 result=7ff80000.00000001 errno=0
func=log op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
func=log op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
func=log op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
func=log op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
func=log op1=7fefffff.ffffffff result=40862e42.fefa39ef.354 errno=0
func=log op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
func=log op1=3ff00000.00000000 result=00000000.00000000 errno=0
func=log op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
func=log op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
func=log op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
func=log op1=00000000.00000001 result=c0874385.446d71c3.639 errno=0
func=log op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
func=log op1=40000000.00000000 result=3fe62e42.fefa39ef.358 errno=0
func=log op1=3fe00000.00000000 result=bfe62e42.fefa39ef.358 errno=0

View file

@ -0,0 +1,21 @@
; Directed test cases for log2
;
; Copyright (c) 2018-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
func=log2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
func=log2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
func=log2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
func=log2 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
func=log2 op1=7fefffff.ffffffff result=408fffff.ffffffff.ffa errno=0
func=log2 op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
func=log2 op1=3ff00000.00000000 result=00000000.00000000 errno=0
func=log2 op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
func=log2 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
func=log2 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
func=log2 op1=00000000.00000001 result=c090c800.00000000 errno=0
func=log2 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
func=log2 op1=40000000.00000000 result=3ff00000.00000000 errno=0
func=log2 op1=3fe00000.00000000 result=bff00000.00000000 errno=0

View file

@ -0,0 +1,27 @@
; log2f.tst - Directed test cases for log2f
;
; Copyright (c) 2017-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=log2f op1=7fc00001 result=7fc00001 errno=0
func=log2f op1=ffc00001 result=7fc00001 errno=0
func=log2f op1=7f800001 result=7fc00001 errno=0 status=i
func=log2f op1=ff800001 result=7fc00001 errno=0 status=i
func=log2f op1=ff810000 result=7fc00001 errno=0 status=i
func=log2f op1=7f800000 result=7f800000 errno=0
func=log2f op1=ff800000 result=7fc00001 errno=EDOM status=i
func=log2f op1=3f800000 result=00000000 errno=0
func=log2f op1=00000000 result=ff800000 errno=ERANGE status=z
func=log2f op1=80000000 result=ff800000 errno=ERANGE status=z
func=log2f op1=80000001 result=7fc00001 errno=EDOM status=i
func=log2f op1=3f7d70a4 result=bc6d8f8b.7d4 error=0
func=log2f op1=3f604189 result=be4394c8.395 error=0
func=log2f op1=3f278034 result=bf1caa73.88e error=0
func=log2f op1=3edd3c36 result=bf9af3b9.619 error=0
func=log2f op1=3e61259a result=c00bdb95.650 error=0
func=log2f op1=3f8147ae result=3c6b3267.d6a error=0
func=log2f op1=3f8fbe77 result=3e2b5fe2.a1c error=0
func=log2f op1=3fac3eea result=3edb4d5e.1fc error=0
func=log2f op1=3fd6e632 result=3f3f5d3a.827 error=0
func=log2f op1=40070838 result=3f89e055.a0a error=0

View file

@ -0,0 +1,69 @@
; logf.tst - Directed test cases for logf
;
; Copyright (c) 2007-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=logf op1=7fc00001 result=7fc00001 errno=0
func=logf op1=ffc00001 result=7fc00001 errno=0
func=logf op1=7f800001 result=7fc00001 errno=0 status=i
func=logf op1=ff800001 result=7fc00001 errno=0 status=i
func=logf op1=ff810000 result=7fc00001 errno=0 status=i
func=logf op1=7f800000 result=7f800000 errno=0
func=logf op1=ff800000 result=7fc00001 errno=EDOM status=i
func=logf op1=3f800000 result=00000000 errno=0
func=logf op1=00000000 result=ff800000 errno=ERANGE status=z
func=logf op1=80000000 result=ff800000 errno=ERANGE status=z
func=logf op1=80000001 result=7fc00001 errno=EDOM status=i
; Directed tests for the special-case handling of log of things
; very near 1
func=logf op1=3f781e49 result=bd0016d9.4ae error=0
func=logf op1=3f78e602 result=bce675e5.f31 error=0
func=logf op1=3f844a18 result=3d07030e.ae1 error=0
func=logf op1=3f79b55b result=bccbd88a.6cb error=0
func=logf op1=3f7e2f5f result=bbe92452.74a error=0
func=logf op1=3f7f1c03 result=bb6462c1.c2c error=0
func=logf op1=3f78b213 result=bced23e2.f56 error=0
func=logf op1=3f87d5c0 result=3d735847.b7a error=0
func=logf op1=3f7fa6ad result=bab2c532.12d error=0
func=logf op1=3f87c06a result=3d70d4b6.b5e error=0
func=logf op1=3f79cf30 result=bcc88942.6e9 error=0
func=logf op1=3f794c77 result=bcd94c6f.b1e error=0
func=logf op1=3f835655 result=3cd2d8a0.0bf error=0
func=logf op1=3f81b5c0 result=3c596d08.520 error=0
func=logf op1=3f805e2f result=3b3c18d4.d2b error=0
func=logf op1=3f7aa609 result=bcad0f90.fdb error=0
func=logf op1=3f7a9091 result=bcafcd59.f83 error=0
func=logf op1=3f7a7475 result=bcb36490.a0f error=0
func=logf op1=3f823417 result=3c8bd287.fa6 error=0
func=logf op1=3f7fbcc3 result=ba868bac.14c error=0
func=logf op1=3f805fc9 result=3b3f4a76.169 error=0
func=logf op1=3f833d43 result=3cccbc4f.cb7 error=0
func=logf op1=3f7cb1de result=bc54e91e.6b5 error=0
func=logf op1=3f7f2793 result=bb58c8af.bfc error=0
func=logf op1=3f7bb8c3 result=bc8a0fc9.93c error=0
func=logf op1=3f81d349 result=3c67fe09.42e error=0
func=logf op1=3f7c254d result=bc788cf4.610 error=0
func=logf op1=3f7f789d result=bb0786d9.6c6 error=0
func=logf op1=3f7ed1f2 result=bb97605f.963 error=0
func=logf op1=3f826067 result=3c96b4af.5e1 error=0
func=logf op1=3f821a68 result=3c8581f9.dac error=0
func=logf op1=3f864e1a result=3d44f368.e66 error=0
func=logf op1=3f7fea3d result=b9ae1f66.b58 error=0
func=logf op1=3f7cf4f5 result=bc43ed76.1c5 error=0
func=logf op1=3f84c223 result=3d15814e.36d error=0
func=logf op1=3f7dae6d result=bc1511d5.0aa error=0
func=logf op1=3f7c0a3c result=bc7f6c0d.758 error=0
func=logf op1=3f858b22 result=3d2da861.f36 error=0
func=logf op1=3f85d7c7 result=3d36d490.ee9 error=0
func=logf op1=3f7f2109 result=bb5f5851.2ed error=0
func=logf op1=3f83809c result=3cdd23f7.6b1 error=0
func=logf op1=3f83d96e result=3cf2b9c8.0b1 error=0
func=logf op1=3f86ca84 result=3d53bee8.53f error=0
func=logf op1=3f83548e result=3cd269c3.39d error=0
func=logf op1=3f7c199c result=bc7b84b6.0da error=0
func=logf op1=3f83133f result=3cc27c0a.9dd error=0
func=logf op1=3f7c97b4 result=bc5b89dd.399 error=0
func=logf op1=3f810bc1 result=3c05553c.011 error=0
func=logf op1=3f7dadb8 result=bc153f7e.fbb error=0
func=logf op1=3f87be56 result=3d709602.538 error=0

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,246 @@
; powf.tst - Directed test cases for powf
;
; Copyright (c) 2007-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=powf op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=7fc00001 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=ffc00001 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=7f800000 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=40800000 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=40400000 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=3f000000 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=00000000 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=80000000 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=bf000000 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=c0400000 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=c0800000 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=ff800000 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=7fc00001 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=ffc00001 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=7f800000 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=40800000 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=40400000 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=3f000000 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=00000000 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=80000000 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=bf000000 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=c0400000 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=c0800000 result=7fc00001 errno=0 status=i
func=powf op1=ff800001 op2=ff800000 result=7fc00001 errno=0 status=i
func=powf op1=7fc00001 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=7fc00001 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=7fc00001 op2=7fc00001 result=7fc00001 errno=0
func=powf op1=7fc00001 op2=ffc00001 result=7fc00001 errno=0
func=powf op1=7fc00001 op2=7f800000 result=7fc00001 errno=0
func=powf op1=7fc00001 op2=40800000 result=7fc00001 errno=0
func=powf op1=7fc00001 op2=40400000 result=7fc00001 errno=0
func=powf op1=7fc00001 op2=3f000000 result=7fc00001 errno=0
func=powf op1=7fc00001 op2=00000000 result=3f800000 errno=0
func=powf op1=7fc00001 op2=80000000 result=3f800000 errno=0
func=powf op1=7fc00001 op2=bf000000 result=7fc00001 errno=0
func=powf op1=7fc00001 op2=c0400000 result=7fc00001 errno=0
func=powf op1=7fc00001 op2=c0800000 result=7fc00001 errno=0
func=powf op1=7fc00001 op2=ff800000 result=7fc00001 errno=0
func=powf op1=ffc00001 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=ffc00001 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=ffc00001 op2=7fc00001 result=7fc00001 errno=0
func=powf op1=ffc00001 op2=ffc00001 result=7fc00001 errno=0
func=powf op1=ffc00001 op2=7f800000 result=7fc00001 errno=0
func=powf op1=ffc00001 op2=40800000 result=7fc00001 errno=0
func=powf op1=ffc00001 op2=40400000 result=7fc00001 errno=0
func=powf op1=ffc00001 op2=3f000000 result=7fc00001 errno=0
func=powf op1=ffc00001 op2=00000000 result=3f800000 errno=0
func=powf op1=ffc00001 op2=80000000 result=3f800000 errno=0
func=powf op1=ffc00001 op2=bf000000 result=7fc00001 errno=0
func=powf op1=ffc00001 op2=c0400000 result=7fc00001 errno=0
func=powf op1=ffc00001 op2=c0800000 result=7fc00001 errno=0
func=powf op1=ffc00001 op2=ff800000 result=7fc00001 errno=0
func=powf op1=7f800000 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=7f800000 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=7f800000 op2=7fc00001 result=7fc00001 errno=0
func=powf op1=7f800000 op2=ffc00001 result=7fc00001 errno=0
func=powf op1=7f800000 op2=7f800000 result=7f800000 errno=0
func=powf op1=7f800000 op2=40800000 result=7f800000 errno=0
func=powf op1=7f800000 op2=40400000 result=7f800000 errno=0
func=powf op1=7f800000 op2=3f000000 result=7f800000 errno=0
func=powf op1=7f800000 op2=00000001 result=7f800000 errno=0
func=powf op1=7f800000 op2=00000000 result=3f800000 errno=0
func=powf op1=7f800000 op2=80000000 result=3f800000 errno=0
func=powf op1=7f800000 op2=bf000000 result=00000000 errno=0
func=powf op1=7f800000 op2=c0400000 result=00000000 errno=0
func=powf op1=7f800000 op2=c0800000 result=00000000 errno=0
func=powf op1=7f800000 op2=ff800000 result=00000000 errno=0
func=powf op1=40800000 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=40800000 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=40800000 op2=7fc00001 result=7fc00001 errno=0
func=powf op1=40800000 op2=ffc00001 result=7fc00001 errno=0
func=powf op1=40800000 op2=7f800000 result=7f800000 errno=0
func=powf op1=40800000 op2=40800000 result=43800000 errno=0
func=powf op1=40800000 op2=40400000 result=42800000 errno=0
func=powf op1=40800000 op2=3f000000 result=40000000 errno=0
func=powf op1=40800000 op2=00000000 result=3f800000 errno=0
func=powf op1=40800000 op2=80000000 result=3f800000 errno=0
func=powf op1=40800000 op2=bf000000 result=3f000000 errno=0
func=powf op1=40800000 op2=c0400000 result=3c800000 errno=0
func=powf op1=40800000 op2=c0800000 result=3b800000 errno=0
func=powf op1=40800000 op2=ff800000 result=00000000 errno=0
func=powf op1=3f800000 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=3f800000 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=3f800000 op2=7fc00001 result=3f800000 errno=0
func=powf op1=3f800000 op2=ffc00001 result=3f800000 errno=0
func=powf op1=3f800000 op2=7f800000 result=3f800000 errno=0
func=powf op1=3f800000 op2=40800000 result=3f800000 errno=0
func=powf op1=3f800000 op2=40400000 result=3f800000 errno=0
func=powf op1=3f800000 op2=3f000000 result=3f800000 errno=0
func=powf op1=3f800000 op2=00000000 result=3f800000 errno=0
func=powf op1=3f800000 op2=80000000 result=3f800000 errno=0
func=powf op1=3f800000 op2=bf000000 result=3f800000 errno=0
func=powf op1=3f800000 op2=c0400000 result=3f800000 errno=0
func=powf op1=3f800000 op2=c0800000 result=3f800000 errno=0
func=powf op1=3f800000 op2=ff800000 result=3f800000 errno=0
func=powf op1=3e800000 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=3e800000 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=3e800000 op2=7fc00001 result=7fc00001 errno=0
func=powf op1=3e800000 op2=ffc00001 result=7fc00001 errno=0
func=powf op1=3e800000 op2=7f800000 result=00000000 errno=0
func=powf op1=3e800000 op2=40800000 result=3b800000 errno=0
func=powf op1=3e800000 op2=40400000 result=3c800000 errno=0
func=powf op1=3e800000 op2=3f000000 result=3f000000 errno=0
func=powf op1=3e800000 op2=00000000 result=3f800000 errno=0
func=powf op1=3e800000 op2=80000000 result=3f800000 errno=0
func=powf op1=3e800000 op2=bf000000 result=40000000 errno=0
func=powf op1=3e800000 op2=c0400000 result=42800000 errno=0
func=powf op1=3e800000 op2=c0800000 result=43800000 errno=0
func=powf op1=3e800000 op2=ff800000 result=7f800000 errno=0
func=powf op1=00000001 op2=bf800000 result=7f800000 errno=ERANGE status=ox
func=powf op1=00000000 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=00000000 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=00000000 op2=7fc00001 result=7fc00001 errno=0
func=powf op1=00000000 op2=ffc00001 result=7fc00001 errno=0
func=powf op1=00000000 op2=7f800000 result=00000000 errno=0
func=powf op1=00000000 op2=40800000 result=00000000 errno=0
func=powf op1=00000000 op2=40400000 result=00000000 errno=0
func=powf op1=00000000 op2=3f000000 result=00000000 errno=0
func=powf op1=00000000 op2=00000000 result=3f800000 errno=0
func=powf op1=00000000 op2=80000000 result=3f800000 errno=0
func=powf op1=00000000 op2=bf000000 result=7f800000 errno=ERANGE status=z
func=powf op1=00000000 op2=c0400000 result=7f800000 errno=ERANGE status=z
func=powf op1=00000000 op2=c0800000 result=7f800000 errno=ERANGE status=z
func=powf op1=00000000 op2=ff800000 result=7f800000 errno=ERANGE
func=powf op1=80000000 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=80000000 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=80000000 op2=7fc00001 result=7fc00001 errno=0
func=powf op1=80000000 op2=ffc00001 result=7fc00001 errno=0
func=powf op1=80000000 op2=7f800000 result=00000000 errno=0
func=powf op1=80000000 op2=40800000 result=00000000 errno=0
func=powf op1=80000000 op2=40400000 result=80000000 errno=0
func=powf op1=80000000 op2=3f000000 result=00000000 errno=0
func=powf op1=80000000 op2=00000000 result=3f800000 errno=0
func=powf op1=80000000 op2=80000000 result=3f800000 errno=0
func=powf op1=80000000 op2=bf000000 result=7f800000 errno=ERANGE status=z
func=powf op1=80000000 op2=c0400000 result=ff800000 errno=ERANGE status=z
func=powf op1=80000000 op2=c0800000 result=7f800000 errno=ERANGE status=z
func=powf op1=80000000 op2=ff800000 result=7f800000 errno=ERANGE
func=powf op1=be800000 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=be800000 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=be800000 op2=7fc00001 result=7fc00001 errno=0
func=powf op1=be800000 op2=ffc00001 result=7fc00001 errno=0
func=powf op1=be800000 op2=7f800000 result=00000000 errno=0
func=powf op1=be800000 op2=40800000 result=3b800000 errno=0
func=powf op1=be800000 op2=40400000 result=bc800000 errno=0
func=powf op1=be800000 op2=3f000000 result=7fc00001 errno=EDOM status=i
func=powf op1=be800000 op2=00000000 result=3f800000 errno=0
func=powf op1=be800000 op2=80000000 result=3f800000 errno=0
func=powf op1=be800000 op2=bf000000 result=7fc00001 errno=EDOM status=i
func=powf op1=be800000 op2=c0400000 result=c2800000 errno=0
func=powf op1=be800000 op2=c0800000 result=43800000 errno=0
func=powf op1=be800000 op2=ff800000 result=7f800000 errno=0
func=powf op1=bf800000 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=bf800000 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=bf800000 op2=7fc00001 result=7fc00001 errno=0
func=powf op1=bf800000 op2=ffc00001 result=7fc00001 errno=0
func=powf op1=bf800000 op2=7f800000 result=3f800000 errno=0
func=powf op1=bf800000 op2=40800000 result=3f800000 errno=0
func=powf op1=bf800000 op2=40400000 result=bf800000 errno=0
func=powf op1=bf800000 op2=3f000000 result=7fc00001 errno=EDOM status=i
func=powf op1=bf800000 op2=00000000 result=3f800000 errno=0
func=powf op1=bf800000 op2=80000000 result=3f800000 errno=0
func=powf op1=bf800000 op2=bf000000 result=7fc00001 errno=EDOM status=i
func=powf op1=bf800000 op2=c0400000 result=bf800000 errno=0
func=powf op1=bf800000 op2=c0800000 result=3f800000 errno=0
func=powf op1=bf800000 op2=ff800000 result=3f800000 errno=0
func=powf op1=c0800000 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=c0800000 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=c0800000 op2=7fc00001 result=7fc00001 errno=0
func=powf op1=c0800000 op2=ffc00001 result=7fc00001 errno=0
func=powf op1=c0800000 op2=7f800000 result=7f800000 errno=0
func=powf op1=c0800000 op2=40800000 result=43800000 errno=0
func=powf op1=c0800000 op2=40400000 result=c2800000 errno=0
func=powf op1=c0800000 op2=3f000000 result=7fc00001 errno=EDOM status=i
func=powf op1=c0800000 op2=00000000 result=3f800000 errno=0
func=powf op1=c0800000 op2=80000000 result=3f800000 errno=0
func=powf op1=c0800000 op2=bf000000 result=7fc00001 errno=EDOM status=i
func=powf op1=c0800000 op2=c0400000 result=bc800000 errno=0
func=powf op1=c0800000 op2=c0800000 result=3b800000 errno=0
func=powf op1=c0800000 op2=ff800000 result=00000000 errno=0
func=powf op1=ff800000 op2=7f800001 result=7fc00001 errno=0 status=i
func=powf op1=ff800000 op2=ff800001 result=7fc00001 errno=0 status=i
func=powf op1=ff800000 op2=7fc00001 result=7fc00001 errno=0
func=powf op1=ff800000 op2=ffc00001 result=7fc00001 errno=0
func=powf op1=ff800000 op2=7f800000 result=7f800000 errno=0
func=powf op1=ff800000 op2=40800000 result=7f800000 errno=0
func=powf op1=ff800000 op2=40400000 result=ff800000 errno=0
func=powf op1=ff800000 op2=3f000000 result=7f800000 errno=0
func=powf op1=ff800000 op2=00000000 result=3f800000 errno=0
func=powf op1=ff800000 op2=80000000 result=3f800000 errno=0
func=powf op1=ff800000 op2=bf000000 result=00000000 errno=0
func=powf op1=ff800000 op2=c0400000 result=80000000 errno=0
func=powf op1=ff800000 op2=c0800000 result=00000000 errno=0
func=powf op1=ff800000 op2=ff800000 result=00000000 errno=0
func=powf op1=36c27f9d op2=4109fa51 result=00000000 errno=ERANGE status=ux
func=powf op1=351738cd op2=c0c55691 result=7f800000 errno=ERANGE status=ox
func=powf op1=42836035 op2=41a99f40 result=7f800000 errno=ERANGE status=ox
func=powf op1=32bd53f3 op2=40bcba58 result=00000000 errno=ERANGE status=ux
func=powf op1=32dc5bff op2=40be62ea result=00000000 errno=ERANGE status=ux
func=powf op1=3a8a3f66 op2=4172bd43 result=00000000 errno=ERANGE status=ux
func=powf op1=28f0e770 op2=c035b4ca result=7f800000 errno=ERANGE status=ox
func=powf op1=40886699 op2=c28f703a result=00000000 errno=ERANGE status=ux
func=powf op1=414bd593 op2=c22370cf result=00000000 errno=ERANGE status=ux
func=powf op1=3a2f1163 op2=c1422d45 result=7f800000 errno=ERANGE status=ox
func=powf op1=434f5cf3 op2=41851272 result=7f800000 errno=ERANGE status=ox
func=powf op1=2e0e27a4 op2=c06b13f5 result=7f800000 errno=ERANGE status=ox
func=powf op1=39aef7a6 op2=414fd60a result=00000000 errno=ERANGE status=ux
func=powf op1=21c80729 op2=c00a04ab result=7f800000 errno=ERANGE status=ox
func=powf op1=42455a4b op2=c1d55905 result=00000000 errno=ERANGE status=ux
func=powf op1=2d173e0b op2=c05ee797 result=7f800000 errno=ERANGE status=ox
func=powf op1=452edf9a op2=4132dd7f result=7f800000 errno=ERANGE status=ox
func=powf op1=406bf67b op2=c29f5f12 result=00000000 errno=ERANGE status=ux
func=powf op1=2d82a6fc op2=4085779e result=00000000 errno=ERANGE status=ux
func=powf op1=4551f827 op2=41304516 result=7f800000 errno=ERANGE status=ox
func=powf op1=3a917c51 op2=41726c0a result=00000001.37f errno=0 status=ux
; iso c allows both errno=ERANGE and errno=0
;func=powf op1=3b19bbaa op2=4188e6fb result=00000000.b5f errno=0 status=ux
;func=powf op1=4088bd18 op2=c28ef056 result=00000000.986 errno=0 status=ux
func=powf op1=3f7ffd76 op2=4a09221e result=00aa9d24.3ad error=0
func=powf op1=007fffff op2=bf000001 result=5f00002c.2b2 error=0
func=powf op1=000007ff op2=bf000001 result=62000830.96f error=0
func=powf op1=007fffff op2=80800001 result=3f800000.000 error=0
func=powf op1=00000000 op2=800007ff result=7f800000 errno=ERANGE status=z
func=powf op1=00000000 op2=000007ff result=00000000 error=0
func=powf op1=bf800000 op2=ff7fffff result=3f800000 error=0
func=powf op1=2e4e4f30 op2=406b0dc2 result=007e9c59.eb4 errno=0 status=u
; SDCOMP-25549: ensure the biggest overflow case possible is not
; mishandled. Also check the analogous underflow, and also ensure that
; our massive-overflow checks do not affect numbers _just within_ the
; range.
func=powf op1=7f7fffff op2=7f7fffff result=7f800000 error=overflow
func=powf op1=7f7fffff op2=ff7fffff result=00000000 error=underflow
func=powf op1=54cb3000 op2=403fffff result=7f7fffb2.a95 error=0

View file

@ -0,0 +1,51 @@
; Directed test cases for SP sincos
;
; Copyright (c) 2007-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=sincosf_sinf op1=7fc00001 result=7fc00001 errno=0
func=sincosf_sinf op1=ffc00001 result=7fc00001 errno=0
func=sincosf_sinf op1=7f800001 result=7fc00001 errno=0 status=i
func=sincosf_sinf op1=ff800001 result=7fc00001 errno=0 status=i
func=sincosf_sinf op1=7f800000 result=7fc00001 errno=EDOM status=i
func=sincosf_sinf op1=ff800000 result=7fc00001 errno=EDOM status=i
func=sincosf_sinf op1=00000000 result=00000000 errno=0
func=sincosf_sinf op1=80000000 result=80000000 errno=0
func=sincosf_sinf op1=c70d39a1 result=be37fad5.7ed errno=0
func=sincosf_sinf op1=46427f1b result=3f352d80.f9b error=0
func=sincosf_sinf op1=4647e568 result=3f352da9.7be error=0
func=sincosf_sinf op1=46428bac result=bf352dea.924 error=0
func=sincosf_sinf op1=4647f1f9 result=bf352e13.146 error=0
func=sincosf_sinf op1=4647fe8a result=3f352e7c.ac9 error=0
func=sincosf_sinf op1=45d8d7f1 result=3f35097b.cb0 error=0
func=sincosf_sinf op1=45d371a4 result=bf350990.102 error=0
func=sincosf_sinf op1=45ce0b57 result=3f3509a4.554 error=0
func=sincosf_sinf op1=45d35882 result=3f3509f9.bdb error=0
func=sincosf_sinf op1=45cdf235 result=bf350a0e.02c error=0
func=sincosf_cosf op1=7fc00001 result=7fc00001 errno=0
func=sincosf_cosf op1=ffc00001 result=7fc00001 errno=0
func=sincosf_cosf op1=7f800001 result=7fc00001 errno=0 status=i
func=sincosf_cosf op1=ff800001 result=7fc00001 errno=0 status=i
func=sincosf_cosf op1=7f800000 result=7fc00001 errno=EDOM status=i
func=sincosf_cosf op1=ff800000 result=7fc00001 errno=EDOM status=i
func=sincosf_cosf op1=00000000 result=3f800000 errno=0
func=sincosf_cosf op1=80000000 result=3f800000 errno=0
func=sincosf_cosf op1=46427f1b result=3f34dc5c.565 error=0
func=sincosf_cosf op1=4647e568 result=3f34dc33.c1f error=0
func=sincosf_cosf op1=46428bac result=bf34dbf2.8e3 error=0
func=sincosf_cosf op1=4647f1f9 result=bf34dbc9.f9b error=0
func=sincosf_cosf op1=4647fe8a result=3f34db60.313 error=0
func=sincosf_cosf op1=45d8d7f1 result=bf35006a.7fd error=0
func=sincosf_cosf op1=45d371a4 result=3f350056.39b error=0
func=sincosf_cosf op1=45ce0b57 result=bf350041.f38 error=0
func=sincosf_cosf op1=45d35882 result=bf34ffec.868 error=0
func=sincosf_cosf op1=45cdf235 result=3f34ffd8.404 error=0
; no underflow
func=sincosf_sinf op1=17800000 result=17800000.000
func=sincosf_cosf op1=17800000 result=3f800000.000
; underflow
func=sincosf_sinf op1=00400000 result=00400000.000 status=ux
func=sincosf_cosf op1=00400000 result=3f800000.000 status=ux

View file

@ -0,0 +1,28 @@
; sinf.tst - Directed test cases for SP sine
;
; Copyright (c) 2007-2019, Arm Limited.
; SPDX-License-Identifier: MIT
func=sinf op1=7fc00001 result=7fc00001 errno=0
func=sinf op1=ffc00001 result=7fc00001 errno=0
func=sinf op1=7f800001 result=7fc00001 errno=0 status=i
func=sinf op1=ff800001 result=7fc00001 errno=0 status=i
func=sinf op1=7f800000 result=7fc00001 errno=EDOM status=i
func=sinf op1=ff800000 result=7fc00001 errno=EDOM status=i
func=sinf op1=00000000 result=00000000 errno=0
func=sinf op1=80000000 result=80000000 errno=0
; Directed test for a failure I found while developing mathbench
func=sinf op1=c70d39a1 result=be37fad5.7ed errno=0
; SDCOMP-26094: check sinf in the cases for which the range reducer
; returns values furthest beyond its nominal upper bound of pi/4.
func=sinf op1=46427f1b result=3f352d80.f9b error=0
func=sinf op1=4647e568 result=3f352da9.7be error=0
func=sinf op1=46428bac result=bf352dea.924 error=0
func=sinf op1=4647f1f9 result=bf352e13.146 error=0
func=sinf op1=4647fe8a result=3f352e7c.ac9 error=0
func=sinf op1=45d8d7f1 result=3f35097b.cb0 error=0
func=sinf op1=45d371a4 result=bf350990.102 error=0
func=sinf op1=45ce0b57 result=3f3509a4.554 error=0
func=sinf op1=45d35882 result=3f3509f9.bdb error=0
func=sinf op1=45cdf235 result=bf350a0e.02c error=0

View file

@ -0,0 +1,10 @@
!! double.tst - Random test case specification for DP functions
!!
!! Copyright (c) 1999-2019, Arm Limited.
!! SPDX-License-Identifier: MIT
test exp 10000
test exp2 10000
test log 10000
test log2 10000
test pow 40000

View file

@ -0,0 +1,15 @@
!! single.tst - Random test case specification for SP functions
!!
!! Copyright (c) 1999-2019, Arm Limited.
!! SPDX-License-Identifier: MIT
test sinf 10000
test cosf 10000
test sincosf_sinf 5000
test sincosf_cosf 5000
test tanf 10000
test expf 10000
test exp2f 10000
test logf 10000
test log2f 10000
test powf 10000

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,853 @@
/*
* ULP error checking tool for math functions.
*
* Copyright (c) 2019-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <ctype.h>
#include <fenv.h>
#include <float.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mathlib.h"
/* Don't depend on mpfr by default. */
#ifndef USE_MPFR
# define USE_MPFR 0
#endif
#if USE_MPFR
# include <mpfr.h>
#endif
#ifndef WANT_VMATH
/* Enable the build of vector math code. */
# define WANT_VMATH 1
#endif
static inline uint64_t
asuint64 (double f)
{
union
{
double f;
uint64_t i;
} u = {f};
return u.i;
}
static inline double
asdouble (uint64_t i)
{
union
{
uint64_t i;
double f;
} u = {i};
return u.f;
}
static inline uint32_t
asuint (float f)
{
union
{
float f;
uint32_t i;
} u = {f};
return u.i;
}
static inline float
asfloat (uint32_t i)
{
union
{
uint32_t i;
float f;
} u = {i};
return u.f;
}
static uint64_t seed = 0x0123456789abcdef;
static uint64_t
rand64 (void)
{
seed = 6364136223846793005ull * seed + 1;
return seed ^ (seed >> 32);
}
/* Uniform random in [0,n]. */
static uint64_t
randn (uint64_t n)
{
uint64_t r, m;
if (n == 0)
return 0;
n++;
if (n == 0)
return rand64 ();
for (;;)
{
r = rand64 ();
m = r % n;
if (r - m <= -n)
return m;
}
}
struct gen
{
uint64_t start;
uint64_t len;
uint64_t start2;
uint64_t len2;
uint64_t off;
uint64_t step;
uint64_t cnt;
};
struct args_f1
{
float x;
};
struct args_f2
{
float x;
float x2;
};
struct args_d1
{
double x;
};
struct args_d2
{
double x;
double x2;
};
/* result = y + tail*2^ulpexp. */
struct ret_f
{
float y;
double tail;
int ulpexp;
int ex;
int ex_may;
};
struct ret_d
{
double y;
double tail;
int ulpexp;
int ex;
int ex_may;
};
static inline uint64_t
next1 (struct gen *g)
{
/* For single argument use randomized incremental steps,
that produce dense sampling without collisions and allow
testing all inputs in a range. */
uint64_t r = g->start + g->off;
g->off += g->step + randn (g->step / 2);
if (g->off > g->len)
g->off -= g->len; /* hack. */
return r;
}
static inline uint64_t
next2 (uint64_t *x2, struct gen *g)
{
/* For two arguments use uniform random sampling. */
uint64_t r = g->start + randn (g->len);
*x2 = g->start2 + randn (g->len2);
return r;
}
static struct args_f1
next_f1 (void *g)
{
return (struct args_f1){asfloat (next1 (g))};
}
static struct args_f2
next_f2 (void *g)
{
uint64_t x2;
uint64_t x = next2 (&x2, g);
return (struct args_f2){asfloat (x), asfloat (x2)};
}
static struct args_d1
next_d1 (void *g)
{
return (struct args_d1){asdouble (next1 (g))};
}
static struct args_d2
next_d2 (void *g)
{
uint64_t x2;
uint64_t x = next2 (&x2, g);
return (struct args_d2){asdouble (x), asdouble (x2)};
}
struct conf
{
int r;
int rc;
int quiet;
int mpfr;
int fenv;
unsigned long long n;
double softlim;
double errlim;
};
/* Wrappers for sincos. */
static float sincosf_sinf(float x) {(void)cosf(x); return sinf(x);}
static float sincosf_cosf(float x) {(void)sinf(x); return cosf(x);}
static double sincos_sin(double x) {(void)cos(x); return sin(x);}
static double sincos_cos(double x) {(void)sin(x); return cos(x);}
#if USE_MPFR
static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_cos(y,x,r); return mpfr_sin(y,x,r); }
static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) { mpfr_sin(y,x,r); return mpfr_cos(y,x,r); }
#endif
/* A bit of a hack: call vector functions twice with the same
input in lane 0 but a different value in other lanes: once
with an in-range value and then with a special case value. */
static int secondcall;
/* Wrappers for vector functions. */
#if __aarch64__ && WANT_VMATH
typedef __f32x4_t v_float;
typedef __f64x2_t v_double;
static const float fv[2] = {1.0f, -INFINITY};
static const double dv[2] = {1.0, -INFINITY};
static inline v_float argf(float x) { return (v_float){x,x,x,fv[secondcall]}; }
static inline v_double argd(double x) { return (v_double){x,dv[secondcall]}; }
static float v_sinf(float x) { return __v_sinf(argf(x))[0]; }
static float v_cosf(float x) { return __v_cosf(argf(x))[0]; }
static float v_expf_1u(float x) { return __v_expf_1u(argf(x))[0]; }
static float v_expf(float x) { return __v_expf(argf(x))[0]; }
static float v_exp2f_1u(float x) { return __v_exp2f_1u(argf(x))[0]; }
static float v_exp2f(float x) { return __v_exp2f(argf(x))[0]; }
static float v_logf(float x) { return __v_logf(argf(x))[0]; }
static float v_powf(float x, float y) { return __v_powf(argf(x),argf(y))[0]; }
static double v_sin(double x) { return __v_sin(argd(x))[0]; }
static double v_cos(double x) { return __v_cos(argd(x))[0]; }
static double v_exp(double x) { return __v_exp(argd(x))[0]; }
static double v_log(double x) { return __v_log(argd(x))[0]; }
static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; }
#ifdef __vpcs
static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; }
static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; }
static float vn_expf_1u(float x) { return __vn_expf_1u(argf(x))[0]; }
static float vn_expf(float x) { return __vn_expf(argf(x))[0]; }
static float vn_exp2f_1u(float x) { return __vn_exp2f_1u(argf(x))[0]; }
static float vn_exp2f(float x) { return __vn_exp2f(argf(x))[0]; }
static float vn_logf(float x) { return __vn_logf(argf(x))[0]; }
static float vn_powf(float x, float y) { return __vn_powf(argf(x),argf(y))[0]; }
static double vn_sin(double x) { return __vn_sin(argd(x))[0]; }
static double vn_cos(double x) { return __vn_cos(argd(x))[0]; }
static double vn_exp(double x) { return __vn_exp(argd(x))[0]; }
static double vn_log(double x) { return __vn_log(argd(x))[0]; }
static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; }
static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; }
static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; }
static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; }
static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
#endif
#endif
struct fun
{
const char *name;
int arity;
int singleprec;
int twice;
union
{
float (*f1) (float);
float (*f2) (float, float);
double (*d1) (double);
double (*d2) (double, double);
} fun;
union
{
double (*f1) (double);
double (*f2) (double, double);
long double (*d1) (long double);
long double (*d2) (long double, long double);
} fun_long;
#if USE_MPFR
union
{
int (*f1) (mpfr_t, const mpfr_t, mpfr_rnd_t);
int (*f2) (mpfr_t, const mpfr_t, const mpfr_t, mpfr_rnd_t);
int (*d1) (mpfr_t, const mpfr_t, mpfr_rnd_t);
int (*d2) (mpfr_t, const mpfr_t, const mpfr_t, mpfr_rnd_t);
} fun_mpfr;
#endif
};
static const struct fun fun[] = {
#if USE_MPFR
# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
{#x, a, s, twice, {.t = x_wrap}, {.t = x_long}, {.t = x_mpfr}},
#else
# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
{#x, a, s, twice, {.t = x_wrap}, {.t = x_long}},
#endif
#define F1(x) F (x##f, x##f, x, mpfr_##x, 1, 1, f1, 0)
#define F2(x) F (x##f, x##f, x, mpfr_##x, 2, 1, f2, 0)
#define D1(x) F (x, x, x##l, mpfr_##x, 1, 0, d1, 0)
#define D2(x) F (x, x, x##l, mpfr_##x, 2, 0, d2, 0)
F1 (sin)
F1 (cos)
F (sincosf_sinf, sincosf_sinf, sincos_sin, sincos_mpfr_sin, 1, 1, f1, 0)
F (sincosf_cosf, sincosf_cosf, sincos_cos, sincos_mpfr_cos, 1, 1, f1, 0)
F1 (exp)
F1 (exp2)
F1 (log)
F1 (log2)
F2 (pow)
F1 (erf)
D1 (exp)
D1 (exp2)
D1 (log)
D1 (log2)
D2 (pow)
D1 (erf)
#if WANT_VMATH
F (__s_sinf, __s_sinf, sin, mpfr_sin, 1, 1, f1, 0)
F (__s_cosf, __s_cosf, cos, mpfr_cos, 1, 1, f1, 0)
F (__s_expf_1u, __s_expf_1u, exp, mpfr_exp, 1, 1, f1, 0)
F (__s_expf, __s_expf, exp, mpfr_exp, 1, 1, f1, 0)
F (__s_exp2f_1u, __s_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 0)
F (__s_exp2f, __s_exp2f, exp2, mpfr_exp2, 1, 1, f1, 0)
F (__s_powf, __s_powf, pow, mpfr_pow, 2, 1, f2, 0)
F (__s_logf, __s_logf, log, mpfr_log, 1, 1, f1, 0)
F (__s_sin, __s_sin, sinl, mpfr_sin, 1, 0, d1, 0)
F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0)
F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0)
F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0)
F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0)
#if __aarch64__
F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1)
F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1)
F (__v_expf_1u, v_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
F (__v_expf, v_expf, exp, mpfr_exp, 1, 1, f1, 1)
F (__v_exp2f_1u, v_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
F (__v_exp2f, v_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
F (__v_logf, v_logf, log, mpfr_log, 1, 1, f1, 1)
F (__v_powf, v_powf, pow, mpfr_pow, 2, 1, f2, 1)
F (__v_sin, v_sin, sinl, mpfr_sin, 1, 0, d1, 1)
F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1)
F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1)
F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1)
F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1)
#ifdef __vpcs
F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1)
F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1)
F (__vn_expf_1u, vn_expf_1u, exp, mpfr_exp, 1, 1, f1, 1)
F (__vn_expf, vn_expf, exp, mpfr_exp, 1, 1, f1, 1)
F (__vn_exp2f_1u, vn_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1)
F (__vn_exp2f, vn_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
F (__vn_logf, vn_logf, log, mpfr_log, 1, 1, f1, 1)
F (__vn_powf, vn_powf, pow, mpfr_pow, 2, 1, f2, 1)
F (__vn_sin, vn_sin, sinl, mpfr_sin, 1, 0, d1, 1)
F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1)
F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1)
F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1)
F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1)
F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1)
F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1)
F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1)
F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1)
F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
#endif
#endif
#endif
#undef F
#undef F1
#undef F2
#undef D1
#undef D2
{0}};
/* Boilerplate for generic calls. */
static inline int
ulpscale_f (float x)
{
int e = asuint (x) >> 23 & 0xff;
if (!e)
e++;
return e - 0x7f - 23;
}
static inline int
ulpscale_d (double x)
{
int e = asuint64 (x) >> 52 & 0x7ff;
if (!e)
e++;
return e - 0x3ff - 52;
}
static inline float
call_f1 (const struct fun *f, struct args_f1 a)
{
return f->fun.f1 (a.x);
}
static inline float
call_f2 (const struct fun *f, struct args_f2 a)
{
return f->fun.f2 (a.x, a.x2);
}
static inline double
call_d1 (const struct fun *f, struct args_d1 a)
{
return f->fun.d1 (a.x);
}
static inline double
call_d2 (const struct fun *f, struct args_d2 a)
{
return f->fun.d2 (a.x, a.x2);
}
static inline double
call_long_f1 (const struct fun *f, struct args_f1 a)
{
return f->fun_long.f1 (a.x);
}
static inline double
call_long_f2 (const struct fun *f, struct args_f2 a)
{
return f->fun_long.f2 (a.x, a.x2);
}
static inline long double
call_long_d1 (const struct fun *f, struct args_d1 a)
{
return f->fun_long.d1 (a.x);
}
static inline long double
call_long_d2 (const struct fun *f, struct args_d2 a)
{
return f->fun_long.d2 (a.x, a.x2);
}
static inline void
printcall_f1 (const struct fun *f, struct args_f1 a)
{
printf ("%s(%a)", f->name, a.x);
}
static inline void
printcall_f2 (const struct fun *f, struct args_f2 a)
{
printf ("%s(%a, %a)", f->name, a.x, a.x2);
}
static inline void
printcall_d1 (const struct fun *f, struct args_d1 a)
{
printf ("%s(%a)", f->name, a.x);
}
static inline void
printcall_d2 (const struct fun *f, struct args_d2 a)
{
printf ("%s(%a, %a)", f->name, a.x, a.x2);
}
static inline void
printgen_f1 (const struct fun *f, struct gen *gen)
{
printf ("%s in [%a;%a]", f->name, asfloat (gen->start),
asfloat (gen->start + gen->len));
}
static inline void
printgen_f2 (const struct fun *f, struct gen *gen)
{
printf ("%s in [%a;%a] x [%a;%a]", f->name, asfloat (gen->start),
asfloat (gen->start + gen->len), asfloat (gen->start2),
asfloat (gen->start2 + gen->len2));
}
static inline void
printgen_d1 (const struct fun *f, struct gen *gen)
{
printf ("%s in [%a;%a]", f->name, asdouble (gen->start),
asdouble (gen->start + gen->len));
}
static inline void
printgen_d2 (const struct fun *f, struct gen *gen)
{
printf ("%s in [%a;%a] x [%a;%a]", f->name, asdouble (gen->start),
asdouble (gen->start + gen->len), asdouble (gen->start2),
asdouble (gen->start2 + gen->len2));
}
#define reduce_f1(a, f, op) (f (a.x))
#define reduce_f2(a, f, op) (f (a.x) op f (a.x2))
#define reduce_d1(a, f, op) (f (a.x))
#define reduce_d2(a, f, op) (f (a.x) op f (a.x2))
#ifndef IEEE_754_2008_SNAN
# define IEEE_754_2008_SNAN 1
#endif
static inline int
issignaling_f (float x)
{
uint32_t ix = asuint (x);
if (!IEEE_754_2008_SNAN)
return (ix & 0x7fc00000) == 0x7fc00000;
return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
}
static inline int
issignaling_d (double x)
{
uint64_t ix = asuint64 (x);
if (!IEEE_754_2008_SNAN)
return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
}
#if USE_MPFR
static mpfr_rnd_t
rmap (int r)
{
switch (r)
{
case FE_TONEAREST:
return MPFR_RNDN;
case FE_TOWARDZERO:
return MPFR_RNDZ;
case FE_UPWARD:
return MPFR_RNDU;
case FE_DOWNWARD:
return MPFR_RNDD;
}
return -1;
}
#define prec_mpfr_f 50
#define prec_mpfr_d 80
#define prec_f 24
#define prec_d 53
#define emin_f -148
#define emin_d -1073
#define emax_f 128
#define emax_d 1024
static inline int
call_mpfr_f1 (mpfr_t y, const struct fun *f, struct args_f1 a, mpfr_rnd_t r)
{
MPFR_DECL_INIT (x, prec_f);
mpfr_set_flt (x, a.x, MPFR_RNDN);
return f->fun_mpfr.f1 (y, x, r);
}
static inline int
call_mpfr_f2 (mpfr_t y, const struct fun *f, struct args_f2 a, mpfr_rnd_t r)
{
MPFR_DECL_INIT (x, prec_f);
MPFR_DECL_INIT (x2, prec_f);
mpfr_set_flt (x, a.x, MPFR_RNDN);
mpfr_set_flt (x2, a.x2, MPFR_RNDN);
return f->fun_mpfr.f2 (y, x, x2, r);
}
static inline int
call_mpfr_d1 (mpfr_t y, const struct fun *f, struct args_d1 a, mpfr_rnd_t r)
{
MPFR_DECL_INIT (x, prec_d);
mpfr_set_d (x, a.x, MPFR_RNDN);
return f->fun_mpfr.d1 (y, x, r);
}
static inline int
call_mpfr_d2 (mpfr_t y, const struct fun *f, struct args_d2 a, mpfr_rnd_t r)
{
MPFR_DECL_INIT (x, prec_d);
MPFR_DECL_INIT (x2, prec_d);
mpfr_set_d (x, a.x, MPFR_RNDN);
mpfr_set_d (x2, a.x2, MPFR_RNDN);
return f->fun_mpfr.d2 (y, x, x2, r);
}
#endif
#define float_f float
#define double_f double
#define copysign_f copysignf
#define nextafter_f nextafterf
#define fabs_f fabsf
#define asuint_f asuint
#define asfloat_f asfloat
#define scalbn_f scalbnf
#define lscalbn_f scalbn
#define halfinf_f 0x1p127f
#define min_normal_f 0x1p-126f
#define float_d double
#define double_d long double
#define copysign_d copysign
#define nextafter_d nextafter
#define fabs_d fabs
#define asuint_d asuint64
#define asfloat_d asdouble
#define scalbn_d scalbn
#define lscalbn_d scalbnl
#define halfinf_d 0x1p1023
#define min_normal_d 0x1p-1022
#define NEW_RT
#define RT(x) x##_f
#define T(x) x##_f1
#include "ulp.h"
#undef T
#define T(x) x##_f2
#include "ulp.h"
#undef T
#undef RT
#define NEW_RT
#define RT(x) x##_d
#define T(x) x##_d1
#include "ulp.h"
#undef T
#define T(x) x##_d2
#include "ulp.h"
#undef T
#undef RT
static void
usage (void)
{
puts ("./ulp [-q] [-m] [-f] [-r nudz] [-l soft-ulplimit] [-e ulplimit] func "
"lo [hi [x lo2 hi2] [count]]");
puts ("Compares func against a higher precision implementation in [lo; hi].");
puts ("-q: quiet.");
puts ("-m: use mpfr even if faster method is available.");
puts ("-f: disable fenv testing (rounding modes and exceptions).");
puts ("Supported func:");
for (const struct fun *f = fun; f->name; f++)
printf ("\t%s\n", f->name);
exit (1);
}
static int
cmp (const struct fun *f, struct gen *gen, const struct conf *conf)
{
int r = 1;
if (f->arity == 1 && f->singleprec)
r = cmp_f1 (f, gen, conf);
else if (f->arity == 2 && f->singleprec)
r = cmp_f2 (f, gen, conf);
else if (f->arity == 1 && !f->singleprec)
r = cmp_d1 (f, gen, conf);
else if (f->arity == 2 && !f->singleprec)
r = cmp_d2 (f, gen, conf);
else
usage ();
return r;
}
static uint64_t
getnum (const char *s, int singleprec)
{
// int i;
uint64_t sign = 0;
// char buf[12];
if (s[0] == '+')
s++;
else if (s[0] == '-')
{
sign = singleprec ? 1ULL << 31 : 1ULL << 63;
s++;
}
/* 0xXXXX is treated as bit representation, '-' flips the sign bit. */
if (s[0] == '0' && tolower (s[1]) == 'x' && strchr (s, 'p') == 0)
return sign ^ strtoull (s, 0, 0);
// /* SNaN, QNaN, NaN, Inf. */
// for (i=0; s[i] && i < sizeof buf; i++)
// buf[i] = tolower(s[i]);
// buf[i] = 0;
// if (strcmp(buf, "snan") == 0)
// return sign | (singleprec ? 0x7fa00000 : 0x7ff4000000000000);
// if (strcmp(buf, "qnan") == 0 || strcmp(buf, "nan") == 0)
// return sign | (singleprec ? 0x7fc00000 : 0x7ff8000000000000);
// if (strcmp(buf, "inf") == 0 || strcmp(buf, "infinity") == 0)
// return sign | (singleprec ? 0x7f800000 : 0x7ff0000000000000);
/* Otherwise assume it's a floating-point literal. */
return sign
| (singleprec ? asuint (strtof (s, 0)) : asuint64 (strtod (s, 0)));
}
static void
parsegen (struct gen *g, int argc, char *argv[], const struct fun *f)
{
int singleprec = f->singleprec;
int arity = f->arity;
uint64_t a, b, a2, b2, n;
if (argc < 1)
usage ();
b = a = getnum (argv[0], singleprec);
n = 0;
if (argc > 1 && strcmp (argv[1], "x") == 0)
{
argc -= 2;
argv += 2;
}
else if (argc > 1)
{
b = getnum (argv[1], singleprec);
if (argc > 2 && strcmp (argv[2], "x") == 0)
{
argc -= 3;
argv += 3;
}
}
b2 = a2 = getnum (argv[0], singleprec);
if (argc > 1)
b2 = getnum (argv[1], singleprec);
if (argc > 2)
n = strtoull (argv[2], 0, 0);
if (argc > 3)
usage ();
//printf("ab %lx %lx ab2 %lx %lx n %lu\n", a, b, a2, b2, n);
if (arity == 1)
{
g->start = a;
g->len = b - a;
if (n - 1 > b - a)
n = b - a + 1;
g->off = 0;
g->step = n ? (g->len + 1) / n : 1;
g->start2 = g->len2 = 0;
g->cnt = n;
}
else if (arity == 2)
{
g->start = a;
g->len = b - a;
g->off = g->step = 0;
g->start2 = a2;
g->len2 = b2 - a2;
g->cnt = n;
}
else
usage ();
}
int
main (int argc, char *argv[])
{
const struct fun *f;
struct gen gen;
struct conf conf;
conf.rc = 'n';
conf.quiet = 0;
conf.mpfr = 0;
conf.fenv = 1;
conf.softlim = 0;
conf.errlim = INFINITY;
for (;;)
{
argc--;
argv++;
if (argc < 1)
usage ();
if (argv[0][0] != '-')
break;
switch (argv[0][1])
{
case 'e':
argc--;
argv++;
if (argc < 1)
usage ();
conf.errlim = strtod (argv[0], 0);
break;
case 'f':
conf.fenv = 0;
break;
case 'l':
argc--;
argv++;
if (argc < 1)
usage ();
conf.softlim = strtod (argv[0], 0);
break;
case 'm':
conf.mpfr = 1;
break;
case 'q':
conf.quiet = 1;
break;
case 'r':
conf.rc = argv[0][2];
if (!conf.rc)
{
argc--;
argv++;
if (argc < 1)
usage ();
conf.rc = argv[0][0];
}
break;
default:
usage ();
}
}
switch (conf.rc)
{
case 'n':
conf.r = FE_TONEAREST;
break;
case 'u':
conf.r = FE_UPWARD;
break;
case 'd':
conf.r = FE_DOWNWARD;
break;
case 'z':
conf.r = FE_TOWARDZERO;
break;
default:
usage ();
}
for (f = fun; f->name; f++)
if (strcmp (argv[0], f->name) == 0)
break;
if (!f->name)
usage ();
if (!f->singleprec && LDBL_MANT_DIG == DBL_MANT_DIG)
conf.mpfr = 1; /* Use mpfr if long double has no extra precision. */
if (!USE_MPFR && conf.mpfr)
{
puts ("mpfr is not available.");
return 0;
}
argc--;
argv++;
parsegen (&gen, argc, argv, f);
conf.n = gen.cnt;
return cmp (f, &gen, &conf);
}

View file

@ -0,0 +1,362 @@
/*
* Generic functions for ULP error estimation.
*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* For each different math function type,
T(x) should add a different suffix to x.
RT(x) should add a return type specific suffix to x. */
#ifdef NEW_RT
#undef NEW_RT
# if USE_MPFR
static int RT(ulpscale_mpfr) (mpfr_t x, int t)
{
/* TODO: pow of 2 cases. */
if (mpfr_regular_p (x))
{
mpfr_exp_t e = mpfr_get_exp (x) - RT(prec);
if (e < RT(emin))
e = RT(emin) - 1;
if (e > RT(emax) - RT(prec))
e = RT(emax) - RT(prec);
return e;
}
if (mpfr_zero_p (x))
return RT(emin) - 1;
if (mpfr_inf_p (x))
return RT(emax) - RT(prec);
/* NaN. */
return 0;
}
# endif
/* Difference between exact result and closest real number that
gets rounded to got, i.e. error before rounding, for a correctly
rounded result the difference is 0. */
static double RT(ulperr) (RT(float) got, const struct RT(ret) * p, int r)
{
RT(float) want = p->y;
RT(float) d;
double e;
if (RT(asuint) (got) == RT(asuint) (want))
return 0.0;
if (signbit (got) != signbit (want))
/* May have false positives with NaN. */
//return isnan(got) && isnan(want) ? 0 : INFINITY;
return INFINITY;
if (!isfinite (want) || !isfinite (got))
{
if (isnan (got) != isnan (want))
return INFINITY;
if (isnan (want))
return 0;
if (isinf (got))
{
got = RT(copysign) (RT(halfinf), got);
want *= 0.5f;
}
if (isinf (want))
{
want = RT(copysign) (RT(halfinf), want);
got *= 0.5f;
}
}
if (r == FE_TONEAREST)
{
// TODO: incorrect when got vs want cross a powof2 boundary
/* error = got > want
? got - want - tail ulp - 0.5 ulp
: got - want - tail ulp + 0.5 ulp; */
d = got - want;
e = d > 0 ? -p->tail - 0.5 : -p->tail + 0.5;
}
else
{
if ((r == FE_DOWNWARD && got < want) || (r == FE_UPWARD && got > want)
|| (r == FE_TOWARDZERO && fabs (got) < fabs (want)))
got = RT(nextafter) (got, want);
d = got - want;
e = -p->tail;
}
return RT(scalbn) (d, -p->ulpexp) + e;
}
static int RT(isok) (RT(float) ygot, int exgot, RT(float) ywant, int exwant,
int exmay)
{
return RT(asuint) (ygot) == RT(asuint) (ywant)
&& ((exgot ^ exwant) & ~exmay) == 0;
}
static int RT(isok_nofenv) (RT(float) ygot, RT(float) ywant)
{
return RT(asuint) (ygot) == RT(asuint) (ywant);
}
#endif
static inline void T(call_fenv) (const struct fun *f, struct T(args) a, int r,
RT(float) * y, int *ex)
{
if (r != FE_TONEAREST)
fesetround (r);
feclearexcept (FE_ALL_EXCEPT);
*y = T(call) (f, a);
*ex = fetestexcept (FE_ALL_EXCEPT);
if (r != FE_TONEAREST)
fesetround (FE_TONEAREST);
}
static inline void T(call_nofenv) (const struct fun *f, struct T(args) a,
int r, RT(float) * y, int *ex)
{
*y = T(call) (f, a);
*ex = 0;
}
static inline int T(call_long_fenv) (const struct fun *f, struct T(args) a,
int r, struct RT(ret) * p,
RT(float) ygot, int exgot)
{
if (r != FE_TONEAREST)
fesetround (r);
feclearexcept (FE_ALL_EXCEPT);
volatile struct T(args) va = a; // TODO: barrier
a = va;
RT(double) yl = T(call_long) (f, a);
p->y = (RT(float)) yl;
volatile RT(float) vy = p->y; // TODO: barrier
(void) vy;
p->ex = fetestexcept (FE_ALL_EXCEPT);
if (r != FE_TONEAREST)
fesetround (FE_TONEAREST);
p->ex_may = FE_INEXACT;
if (RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
return 1;
p->ulpexp = RT(ulpscale) (p->y);
if (isinf (p->y))
p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
else
p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
if (RT(fabs) (p->y) < RT(min_normal))
{
/* TODO: subnormal result is treated as undeflow even if it's
exact since call_long may not raise inexact correctly. */
if (p->y != 0 || (p->ex & FE_INEXACT))
p->ex |= FE_UNDERFLOW | FE_INEXACT;
}
return 0;
}
static inline int T(call_long_nofenv) (const struct fun *f, struct T(args) a,
int r, struct RT(ret) * p,
RT(float) ygot, int exgot)
{
RT(double) yl = T(call_long) (f, a);
p->y = (RT(float)) yl;
if (RT(isok_nofenv) (ygot, p->y))
return 1;
p->ulpexp = RT(ulpscale) (p->y);
if (isinf (p->y))
p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
else
p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
return 0;
}
/* There are nan input args and all quiet. */
static inline int T(qnanpropagation) (struct T(args) a)
{
return T(reduce) (a, isnan, ||) && !T(reduce) (a, RT(issignaling), ||);
}
static inline RT(float) T(sum) (struct T(args) a)
{
return T(reduce) (a, , +);
}
/* returns 1 if the got result is ok. */
static inline int T(call_mpfr_fix) (const struct fun *f, struct T(args) a,
int r_fenv, struct RT(ret) * p,
RT(float) ygot, int exgot)
{
#if USE_MPFR
int t, t2;
mpfr_rnd_t r = rmap (r_fenv);
MPFR_DECL_INIT(my, RT(prec_mpfr));
MPFR_DECL_INIT(mr, RT(prec));
MPFR_DECL_INIT(me, RT(prec_mpfr));
mpfr_clear_flags ();
t = T(call_mpfr) (my, f, a, r);
/* Double rounding. */
t2 = mpfr_set (mr, my, r);
if (t2)
t = t2;
mpfr_set_emin (RT(emin));
mpfr_set_emax (RT(emax));
t = mpfr_check_range (mr, t, r);
t = mpfr_subnormalize (mr, t, r);
mpfr_set_emax (MPFR_EMAX_DEFAULT);
mpfr_set_emin (MPFR_EMIN_DEFAULT);
p->y = mpfr_get_d (mr, r);
p->ex = t ? FE_INEXACT : 0;
p->ex_may = FE_INEXACT;
if (mpfr_underflow_p () && (p->ex & FE_INEXACT))
/* TODO: handle before and after rounding uflow cases. */
p->ex |= FE_UNDERFLOW;
if (mpfr_overflow_p ())
p->ex |= FE_OVERFLOW | FE_INEXACT;
if (mpfr_divby0_p ())
p->ex |= FE_DIVBYZERO;
//if (mpfr_erangeflag_p ())
// p->ex |= FE_INVALID;
if (!mpfr_nanflag_p () && RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
return 1;
if (mpfr_nanflag_p () && !T(qnanpropagation) (a))
p->ex |= FE_INVALID;
p->ulpexp = RT(ulpscale_mpfr) (my, t);
if (!isfinite (p->y))
{
p->tail = 0;
if (isnan (p->y))
{
/* If an input was nan keep its sign. */
p->y = T(sum) (a);
if (!isnan (p->y))
p->y = (p->y - p->y) / (p->y - p->y);
return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
}
mpfr_set_si_2exp (mr, signbit (p->y) ? -1 : 1, 1024, MPFR_RNDN);
if (mpfr_cmpabs (my, mr) >= 0)
return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
}
mpfr_sub (me, my, mr, MPFR_RNDN);
mpfr_mul_2si (me, me, -p->ulpexp, MPFR_RNDN);
p->tail = mpfr_get_d (me, MPFR_RNDN);
return 0;
#else
abort ();
#endif
}
static int T(cmp) (const struct fun *f, struct gen *gen,
const struct conf *conf)
{
double maxerr = 0;
uint64_t cnt = 0;
uint64_t cnt1 = 0;
uint64_t cnt2 = 0;
uint64_t cntfail = 0;
int r = conf->r;
int use_mpfr = conf->mpfr;
int fenv = conf->fenv;
for (;;)
{
struct RT(ret) want;
struct T(args) a = T(next) (gen);
int exgot;
int exgot2;
RT(float) ygot;
RT(float) ygot2;
int fail = 0;
if (fenv)
T(call_fenv) (f, a, r, &ygot, &exgot);
else
T(call_nofenv) (f, a, r, &ygot, &exgot);
if (f->twice) {
secondcall = 1;
if (fenv)
T(call_fenv) (f, a, r, &ygot2, &exgot2);
else
T(call_nofenv) (f, a, r, &ygot2, &exgot2);
secondcall = 0;
if (RT(asuint) (ygot) != RT(asuint) (ygot2))
{
fail = 1;
cntfail++;
T(printcall) (f, a);
printf (" got %a then %a for same input\n", ygot, ygot2);
}
}
cnt++;
int ok = use_mpfr
? T(call_mpfr_fix) (f, a, r, &want, ygot, exgot)
: (fenv ? T(call_long_fenv) (f, a, r, &want, ygot, exgot)
: T(call_long_nofenv) (f, a, r, &want, ygot, exgot));
if (!ok)
{
int print = 0;
double err = RT(ulperr) (ygot, &want, r);
double abserr = fabs (err);
// TODO: count errors below accuracy limit.
if (abserr > 0)
cnt1++;
if (abserr > 1)
cnt2++;
if (abserr > conf->errlim)
{
print = 1;
if (!fail)
{
fail = 1;
cntfail++;
}
}
if (abserr > maxerr)
{
maxerr = abserr;
if (!conf->quiet && abserr > conf->softlim)
print = 1;
}
if (print)
{
T(printcall) (f, a);
// TODO: inf ulp handling
printf (" got %a want %a %+g ulp err %g\n", ygot, want.y,
want.tail, err);
}
int diff = fenv ? exgot ^ want.ex : 0;
if (fenv && (diff & ~want.ex_may))
{
if (!fail)
{
fail = 1;
cntfail++;
}
T(printcall) (f, a);
printf (" is %a %+g ulp, got except 0x%0x", want.y, want.tail,
exgot);
if (diff & exgot)
printf (" wrongly set: 0x%x", diff & exgot);
if (diff & ~exgot)
printf (" wrongly clear: 0x%x", diff & ~exgot);
putchar ('\n');
}
}
if (cnt >= conf->n)
break;
if (!conf->quiet && cnt % 0x100000 == 0)
printf ("progress: %6.3f%% cnt %llu cnt1 %llu cnt2 %llu cntfail %llu "
"maxerr %g\n",
100.0 * cnt / conf->n, (unsigned long long) cnt,
(unsigned long long) cnt1, (unsigned long long) cnt2,
(unsigned long long) cntfail, maxerr);
}
double cc = cnt;
if (cntfail)
printf ("FAIL ");
else
printf ("PASS ");
T(printgen) (f, gen);
printf (" round %c errlim %g maxerr %g %s cnt %llu cnt1 %llu %g%% cnt2 %llu "
"%g%% cntfail %llu %g%%\n",
conf->rc, conf->errlim,
maxerr, conf->r == FE_TONEAREST ? "+0.5" : "+1.0",
(unsigned long long) cnt,
(unsigned long long) cnt1, 100.0 * cnt1 / cc,
(unsigned long long) cnt2, 100.0 * cnt2 / cc,
(unsigned long long) cntfail, 100.0 * cntfail / cc);
return !!cntfail;
}

View file

@ -0,0 +1,31 @@
// polynomial for approximating cos(x)
//
// Copyright (c) 2019, Arm Limited.
// SPDX-License-Identifier: MIT
deg = 8; // polynomial degree
a = -pi/4; // interval
b = pi/4;
// find even polynomial with minimal abs error compared to cos(x)
f = cos(x);
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
approx = proc(poly,d) {
return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
};
// first coeff is fixed, iteratively find optimal double prec coeffs
poly = 1;
for i from 1 to deg/2 do {
p = roundcoefficients(approx(poly,2*i), [|D ...|]);
poly = poly + x^(2*i)*coeff(p,0);
};
display = hexadecimal;
print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
print("in [",a,b,"]");
print("coeffs:");
for i from 0 to deg do coeff(poly,i);

View file

@ -0,0 +1,35 @@
// polynomial for approximating e^x
//
// Copyright (c) 2019, Arm Limited.
// SPDX-License-Identifier: MIT
deg = 5; // poly degree
N = 128; // table entries
b = log(2)/(2*N); // interval
b = b + b*0x1p-16; // increase interval for non-nearest rounding (TOINT_NARROW)
a = -b;
// find polynomial with minimal abs error
// return p that minimizes |exp(x) - poly(x) - x^d*p(x)|
approx = proc(poly,d) {
return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
};
// first 2 coeffs are fixed, iteratively find optimal double prec coeffs
poly = 1 + x;
for i from 2 to deg do {
p = roundcoefficients(approx(poly,i), [|D ...|]);
poly = poly + x^i*coeff(p,0);
};
display = hexadecimal;
print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30));
print("abs error:", accurateinfnorm(exp(x)-poly(x), [a;b], 30));
print("in [",a,b,"]");
// double interval error for non-nearest rounding
print("rel2 error:", accurateinfnorm(1-poly(x)/exp(x), [2*a;2*b], 30));
print("abs2 error:", accurateinfnorm(exp(x)-poly(x), [2*a;2*b], 30));
print("in [",2*a,2*b,"]");
print("coeffs:");
for i from 0 to deg do coeff(poly,i);

View file

@ -0,0 +1,48 @@
// polynomial for approximating 2^x
//
// Copyright (c) 2019, Arm Limited.
// SPDX-License-Identifier: MIT
// exp2f parameters
deg = 3; // poly degree
N = 32; // table entries
b = 1/(2*N); // interval
a = -b;
//// exp2 parameters
//deg = 5; // poly degree
//N = 128; // table entries
//b = 1/(2*N); // interval
//a = -b;
// find polynomial with minimal relative error
f = 2^x;
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
approx = proc(poly,d) {
return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
};
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
approx_abs = proc(poly,d) {
return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
};
// first coeff is fixed, iteratively find optimal double prec coeffs
poly = 1;
for i from 1 to deg do {
p = roundcoefficients(approx(poly,i), [|D ...|]);
// p = roundcoefficients(approx_abs(poly,i), [|D ...|]);
poly = poly + x^i*coeff(p,0);
};
display = hexadecimal;
print("rel error:", accurateinfnorm(1-poly(x)/2^x, [a;b], 30));
print("abs error:", accurateinfnorm(2^x-poly(x), [a;b], 30));
print("in [",a,b,"]");
// double interval error for non-nearest rounding:
print("rel2 error:", accurateinfnorm(1-poly(x)/2^x, [2*a;2*b], 30));
print("abs2 error:", accurateinfnorm(2^x-poly(x), [2*a;2*b], 30));
print("in [",2*a,2*b,"]");
print("coeffs:");
for i from 0 to deg do coeff(poly,i);

View file

@ -0,0 +1,35 @@
// polynomial for approximating log(1+x)
//
// Copyright (c) 2019, Arm Limited.
// SPDX-License-Identifier: MIT
deg = 12; // poly degree
// |log(1+x)| > 0x1p-4 outside the interval
a = -0x1p-4;
b = 0x1.09p-4;
// find log(1+x)/x polynomial with minimal relative error
// (minimal relative error polynomial for log(1+x) is the same * x)
deg = deg-1; // because of /x
// f = log(1+x)/x; using taylor series
f = 0;
for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
approx = proc(poly,d) {
return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
};
// first coeff is fixed, iteratively find optimal double prec coeffs
poly = 1;
for i from 1 to deg do {
p = roundcoefficients(approx(poly,i), [|D ...|]);
poly = poly + x^i*coeff(p,0);
};
display = hexadecimal;
print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
print("in [",a,b,"]");
print("coeffs:");
for i from 0 to deg do coeff(poly,i);

View file

@ -0,0 +1,42 @@
// polynomial for approximating log2(1+x)
//
// Copyright (c) 2019, Arm Limited.
// SPDX-License-Identifier: MIT
deg = 11; // poly degree
// |log2(1+x)| > 0x1p-4 outside the interval
a = -0x1.5b51p-5;
b = 0x1.6ab2p-5;
ln2 = evaluate(log(2),0);
invln2hi = double(1/ln2 + 0x1p21) - 0x1p21; // round away last 21 bits
invln2lo = double(1/ln2 - invln2hi);
// find log2(1+x)/x polynomial with minimal relative error
// (minimal relative error polynomial for log2(1+x) is the same * x)
deg = deg-1; // because of /x
// f = log(1+x)/x; using taylor series
f = 0;
for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
f = f/ln2;
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
approx = proc(poly,d) {
return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
};
// first coeff is fixed, iteratively find optimal double prec coeffs
poly = invln2hi + invln2lo;
for i from 1 to deg do {
p = roundcoefficients(approx(poly,i), [|D ...|]);
poly = poly + x^i*coeff(p,0);
};
display = hexadecimal;
print("invln2hi:", invln2hi);
print("invln2lo:", invln2lo);
print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
print("in [",a,b,"]");
print("coeffs:");
for i from 0 to deg do coeff(poly,i);

View file

@ -0,0 +1,41 @@
// polynomial for approximating log2(1+x)
//
// Copyright (c) 2019, Arm Limited.
// SPDX-License-Identifier: MIT
deg = 7; // poly degree
// interval ~= 1/(2*N), where N is the table entries
a= -0x1.f45p-8;
b= 0x1.f45p-8;
ln2 = evaluate(log(2),0);
invln2hi = double(1/ln2 + 0x1p21) - 0x1p21; // round away last 21 bits
invln2lo = double(1/ln2 - invln2hi);
// find log2(1+x) polynomial with minimal absolute error
f = log(1+x)/ln2;
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
approx = proc(poly,d) {
return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
};
// first coeff is fixed, iteratively find optimal double prec coeffs
poly = x*(invln2lo + invln2hi);
for i from 2 to deg do {
p = roundcoefficients(approx(poly,i), [|D ...|]);
poly = poly + x^i*coeff(p,0);
};
display = hexadecimal;
print("invln2hi:", invln2hi);
print("invln2lo:", invln2lo);
print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
//// relative error computation fails if f(0)==0
//// g = f(x)/x = log2(1+x)/x; using taylor series
//g = 0;
//for i from 0 to 60 do { g = g + (-x)^i/(i+1)/ln2; };
//print("rel error:", accurateinfnorm(1-(poly(x)/x)/g(x), [a;b], 30));
print("in [",a,b,"]");
print("coeffs:");
for i from 0 to deg do coeff(poly,i);

View file

@ -0,0 +1,35 @@
// polynomial for approximating log(1+x)
//
// Copyright (c) 2019, Arm Limited.
// SPDX-License-Identifier: MIT
deg = 6; // poly degree
// interval ~= 1/(2*N), where N is the table entries
a = -0x1.fp-9;
b = 0x1.fp-9;
// find log(1+x) polynomial with minimal absolute error
f = log(1+x);
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
approx = proc(poly,d) {
return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
};
// first coeff is fixed, iteratively find optimal double prec coeffs
poly = x;
for i from 2 to deg do {
p = roundcoefficients(approx(poly,i), [|D ...|]);
poly = poly + x^i*coeff(p,0);
};
display = hexadecimal;
print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
// relative error computation fails if f(0)==0
// g = f(x)/x = log(1+x)/x; using taylor series
g = 0;
for i from 0 to 60 do { g = g + (-x)^i/(i+1); };
print("rel error:", accurateinfnorm(1-poly(x)/x/g(x), [a;b], 30));
print("in [",a,b,"]");
print("coeffs:");
for i from 0 to deg do coeff(poly,i);

View file

@ -0,0 +1,61 @@
#!/usr/bin/python
# ULP error plot tool.
#
# Copyright (c) 2019, Arm Limited.
# SPDX-License-Identifier: MIT
import numpy as np
import matplotlib.pyplot as plt
import sys
import re
# example usage:
# build/bin/ulp -e .0001 log 0.5 2.0 2345678 | math/tools/plot.py
def fhex(s):
return float.fromhex(s)
def parse(f):
xs = []
gs = []
ys = []
es = []
# Has to match the format used in ulp.c
r = re.compile(r'[^ (]+\(([^ )]*)\) got ([^ ]+) want ([^ ]+) [^ ]+ ulp err ([^ ]+)')
for line in f:
m = r.match(line)
if m:
x = fhex(m.group(1))
g = fhex(m.group(2))
y = fhex(m.group(3))
e = float(m.group(4))
xs.append(x)
gs.append(g)
ys.append(y)
es.append(e)
elif line.startswith('PASS') or line.startswith('FAIL'):
# Print the summary line
print(line)
return xs, gs, ys, es
def plot(xs, gs, ys, es):
if len(xs) < 2:
print('not enough samples')
return
a = min(xs)
b = max(xs)
fig, (ax0,ax1) = plt.subplots(nrows=2)
es = np.abs(es) # ignore the sign
emax = max(es)
ax0.text(a+(b-a)*0.7, emax*0.8, '%s\n%g'%(emax.hex(),emax))
ax0.plot(xs,es,'r.')
ax0.grid()
ax1.plot(xs,ys,'r.',label='want')
ax1.plot(xs,gs,'b.',label='got')
ax1.grid()
ax1.legend()
plt.show()
xs, gs, ys, es = parse(sys.stdin)
plot(xs, gs, ys, es)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,37 @@
// polynomial for approximating sin(x)
//
// Copyright (c) 2019, Arm Limited.
// SPDX-License-Identifier: MIT
deg = 7; // polynomial degree
a = -pi/4; // interval
b = pi/4;
// find even polynomial with minimal abs error compared to sin(x)/x
// account for /x
deg = deg-1;
// f = sin(x)/x;
f = 1;
c = 1;
for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; };
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
approx = proc(poly,d) {
return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
};
// first coeff is fixed, iteratively find optimal double prec coeffs
poly = 1;
for i from 1 to deg/2 do {
p = roundcoefficients(approx(poly,2*i), [|D ...|]);
poly = poly + x^(2*i)*coeff(p,0);
};
display = hexadecimal;
print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30));
print("in [",a,b,"]");
print("coeffs:");
for i from 0 to deg do coeff(poly,i);

View file

@ -0,0 +1,30 @@
// polynomial for approximating e^x
//
// Copyright (c) 2019, Arm Limited.
// SPDX-License-Identifier: MIT
deg = 4; // poly degree
N = 128; // table entries
b = log(2)/(2*N); // interval
a = -b;
// find polynomial with minimal abs error
// return p that minimizes |exp(x) - poly(x) - x^d*p(x)|
approx = proc(poly,d) {
return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
};
// first 2 coeffs are fixed, iteratively find optimal double prec coeffs
poly = 1 + x;
for i from 2 to deg do {
p = roundcoefficients(approx(poly,i), [|D ...|]);
poly = poly + x^i*coeff(p,0);
};
display = hexadecimal;
print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30));
print("abs error:", accurateinfnorm(exp(x)-poly(x), [a;b], 30));
print("in [",a,b,"]");
print("coeffs:");
for i from 0 to deg do coeff(poly,i);

View file

@ -0,0 +1,34 @@
// polynomial used for __v_log(x)
//
// Copyright (c) 2019, Arm Limited.
// SPDX-License-Identifier: MIT
deg = 6; // poly degree
a = -0x1.fc1p-9;
b = 0x1.009p-8;
// find log(1+x)/x polynomial with minimal relative error
// (minimal relative error polynomial for log(1+x) is the same * x)
deg = deg-1; // because of /x
// f = log(1+x)/x; using taylor series
f = 0;
for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
approx = proc(poly,d) {
return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
};
// first coeff is fixed, iteratively find optimal double prec coeffs
poly = 1;
for i from 1 to deg do {
p = roundcoefficients(approx(poly,i), [|D ...|]);
poly = poly + x^i*coeff(p,0);
};
display = hexadecimal;
print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
print("in [",a,b,"]");
print("coeffs:");
for i from 0 to deg do coeff(poly,i);

View file

@ -0,0 +1,36 @@
// polynomial for approximating sin(x)
//
// Copyright (c) 2019, Arm Limited.
// SPDX-License-Identifier: MIT
deg = 15; // polynomial degree
a = -pi/2; // interval
b = pi/2;
// find even polynomial with minimal abs error compared to sin(x)/x
// account for /x
deg = deg-1;
// f = sin(x)/x;
f = 1;
c = 1;
for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; };
// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
approx = proc(poly,d) {
return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
};
// first coeff is fixed, iteratively find optimal double prec coeffs
poly = 1;
for i from 1 to deg/2 do {
p = roundcoefficients(approx(poly,2*i), [|D ...|]);
poly = poly + x^(2*i)*coeff(p,0);
};
display = hexadecimal;
print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30));
print("in [",a,b,"]");
print("coeffs:");
for i from 0 to deg do coeff(poly,i);

View file

@ -0,0 +1,87 @@
/*
* Double-precision vector cos function.
*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "mathlib.h"
#include "v_math.h"
#if V_SUPPORTED
static const double Poly[] = {
/* worst-case error is 3.5 ulp.
abs error: 0x1.be222a58p-53 in [-pi/2, pi/2]. */
-0x1.9f4a9c8b21dc9p-41,
0x1.60e88a10163f2p-33,
-0x1.ae6361b7254e7p-26,
0x1.71de382e8d62bp-19,
-0x1.a01a019aeb4ffp-13,
0x1.111111110b25ep-7,
-0x1.55555555554c3p-3,
};
#define C7 v_f64 (Poly[0])
#define C6 v_f64 (Poly[1])
#define C5 v_f64 (Poly[2])
#define C4 v_f64 (Poly[3])
#define C3 v_f64 (Poly[4])
#define C2 v_f64 (Poly[5])
#define C1 v_f64 (Poly[6])
#define InvPi v_f64 (0x1.45f306dc9c883p-2)
#define HalfPi v_f64 (0x1.921fb54442d18p+0)
#define Pi1 v_f64 (0x1.921fb54442d18p+1)
#define Pi2 v_f64 (0x1.1a62633145c06p-53)
#define Pi3 v_f64 (0x1.c1cd129024e09p-106)
#define Shift v_f64 (0x1.8p52)
#define RangeVal v_f64 (0x1p23)
#define AbsMask v_u64 (0x7fffffffffffffff)
VPCS_ATTR
__attribute__ ((noinline)) static v_f64_t
specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
{
return v_call_f64 (cos, x, y, cmp);
}
VPCS_ATTR
v_f64_t
V_NAME(cos) (v_f64_t x)
{
v_f64_t n, r, r2, y;
v_u64_t odd, cmp;
r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask);
cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal));
/* n = rint((|x|+pi/2)/pi) - 0.5. */
n = v_fma_f64 (InvPi, r + HalfPi, Shift);
odd = v_as_u64_f64 (n) << 63;
n -= Shift;
n -= v_f64 (0.5);
/* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */
r = v_fma_f64 (-Pi1, n, r);
r = v_fma_f64 (-Pi2, n, r);
r = v_fma_f64 (-Pi3, n, r);
/* sin(r) poly approx. */
r2 = r * r;
y = v_fma_f64 (C7, r2, C6);
y = v_fma_f64 (y, r2, C5);
y = v_fma_f64 (y, r2, C4);
y = v_fma_f64 (y, r2, C3);
y = v_fma_f64 (y, r2, C2);
y = v_fma_f64 (y, r2, C1);
y = v_fma_f64 (y * r2, r, r);
/* sign. */
y = v_as_f64_u64 (v_as_u64_f64 (y) ^ odd);
if (unlikely (v_any_u64 (cmp)))
return specialcase (x, y, cmp);
return y;
}
VPCS_ALIAS
#endif

View file

@ -0,0 +1,76 @@
/*
* Single-precision vector cos function.
*
* Copyright (c) 2019, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "mathlib.h"
#include "v_math.h"
#if V_SUPPORTED
static const float Poly[] = {
/* 1.886 ulp error */
0x1.5b2e76p-19f,
-0x1.9f42eap-13f,
0x1.110df4p-7f,
-0x1.555548p-3f,
};
#define Pi1 v_f32 (0x1.921fb6p+1f)
#define Pi2 v_f32 (-0x1.777a5cp-24f)
#define Pi3 v_f32 (-0x1.ee59dap-49f)
#define A3 v_f32 (Poly[3])
#define A5 v_f32 (Poly[2])
#define A7 v_f32 (Poly[1])
#define A9 v_f32 (Poly[0])
#define RangeVal v_f32 (0x1p20f)
#define InvPi v_f32 (0x1.45f306p-2f)
#define Shift v_f32 (0x1.8p+23f)
#define AbsMask v_u32 (0x7fffffff)
#define HalfPi v_f32 (0x1.921fb6p0f)
VPCS_ATTR
static v_f32_t
specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
{
/* Fall back to scalar code. */
return v_call_f32 (cosf, x, y, cmp);
}
VPCS_ATTR
v_f32_t
V_NAME(cosf) (v_f32_t x)
{
v_f32_t n, r, r2, y;
v_u32_t odd, cmp;
r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
/* n = rint((|x|+pi/2)/pi) - 0.5 */
n = v_fma_f32 (InvPi, r + HalfPi, Shift);
odd = v_as_u32_f32 (n) << 31;
n -= Shift;
n -= v_f32 (0.5f);
/* r = |x| - n*pi (range reduction into -pi/2 .. pi/2) */
r = v_fma_f32 (-Pi1, n, r);
r = v_fma_f32 (-Pi2, n, r);
r = v_fma_f32 (-Pi3, n, r);
/* y = sin(r) */
r2 = r * r;
y = v_fma_f32 (A9, r2, A7);
y = v_fma_f32 (y, r2, A5);
y = v_fma_f32 (y, r2, A3);
y = v_fma_f32 (y * r2, r, r);
/* sign fix */
y = v_as_f32_u32 (v_as_u32_f32 (y) ^ odd);
if (unlikely (v_any_u32 (cmp)))
return specialcase (x, y, cmp);
return y;
}
VPCS_ALIAS
#endif

Some files were not shown because too many files have changed in this diff Show more