strconv: implement Ryū-like algorithm for fixed precision ftoa

This patch implements a simplified version of Ulf Adams,
"Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369)
for formatting floating-point numbers with a fixed number of decimal
digits.

It uses the same principles but does not need to handle
the complex task of finding a shortest representation.
This allows to handle a few more cases than Grisu3, notably
formatting with up to 18 significant digits.

name                         old time/op  new time/op  delta
AppendFloat/32Fixed8Hard-4   72.0ns ± 2%  56.0ns ± 2%  -22.28%  (p=0.000 n=10+10)
AppendFloat/32Fixed9Hard-4   74.8ns ± 0%  64.2ns ± 2%  -14.16%  (p=0.000 n=8+10)
AppendFloat/64Fixed1-4       60.4ns ± 1%  54.2ns ± 1%  -10.31%  (p=0.000 n=10+9)
AppendFloat/64Fixed2-4       66.3ns ± 1%  53.3ns ± 1%  -19.54%  (p=0.000 n=10+9)
AppendFloat/64Fixed3-4       61.0ns ± 1%  55.0ns ± 2%   -9.80%  (p=0.000 n=9+10)
AppendFloat/64Fixed4-4       66.9ns ± 0%  52.0ns ± 2%  -22.20%  (p=0.000 n=8+10)
AppendFloat/64Fixed12-4      95.5ns ± 1%  76.2ns ± 3%  -20.19%  (p=0.000 n=10+9)
AppendFloat/64Fixed16-4      1.62µs ± 0%  0.07µs ± 2%  -95.69%  (p=0.000 n=10+10)
AppendFloat/64Fixed12Hard-4  1.27µs ± 1%  0.07µs ± 1%  -94.83%  (p=0.000 n=9+9)
AppendFloat/64Fixed17Hard-4  3.68µs ± 1%  0.08µs ± 2%  -97.86%  (p=0.000 n=10+9)
AppendFloat/64Fixed18Hard-4  3.67µs ± 0%  3.72µs ± 1%   +1.44%  (p=0.000 n=9+10)

Updates #15672

Change-Id: I160963e141dd48287ad8cf57bcc3c686277788e8
Reviewed-on: https://go-review.googlesource.com/c/go/+/170079
Reviewed-by: Emmanuel Odeke <emmanuel@orijtech.com>
Trust: Emmanuel Odeke <emmanuel@orijtech.com>
Trust: Nigel Tao <nigeltao@golang.org>
Trust: Robert Griesemer <gri@golang.org>
Run-TryBot: Emmanuel Odeke <emmanuel@orijtech.com>
TryBot-Result: Go Bot <gobot@golang.org>
This commit is contained in:
Rémy Oudompheng 2019-03-24 23:21:38 +01:00 committed by Emmanuel Odeke
parent 8f4c5068e0
commit 0184b445c0
5 changed files with 374 additions and 5 deletions

View file

@ -143,12 +143,15 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte {
}
digits = prec
}
if digits <= 15 {
// try fast algorithm when the number of digits is reasonable.
var buf [24]byte
var buf [24]byte
if bitSize == 32 && digits <= 9 {
digs.d = buf[:]
f := extFloat{mant, exp - int(flt.mantbits), neg}
ok = f.FixedDecimal(&digs, digits)
ryuFtoaFixed32(&digs, uint32(mant), exp-int(flt.mantbits), digits)
ok = true
} else if digits <= 18 {
digs.d = buf[:]
ryuFtoaFixed64(&digs, mant, exp-int(flt.mantbits), digits)
ok = true
}
}
if !ok {

View file

@ -77,6 +77,14 @@ var ftoatests = []ftoaTest{
{1.2345e6, 'f', 5, "1234500.00000"},
{1.2345e6, 'g', 5, "1.2345e+06"},
// Round to even
{1.2345e6, 'e', 3, "1.234e+06"},
{1.2355e6, 'e', 3, "1.236e+06"},
{1.2345, 'f', 3, "1.234"},
{1.2355, 'f', 3, "1.236"},
{1234567890123456.5, 'e', 15, "1.234567890123456e+15"},
{1234567890123457.5, 'e', 15, "1.234567890123458e+15"},
{1e23, 'e', 17, "9.99999999999999916e+22"},
{1e23, 'f', 17, "99999999999999991611392.00000000000000000"},
{1e23, 'g', 17, "9.9999999999999992e+22"},
@ -241,11 +249,19 @@ var ftoaBenches = []struct {
{"32Point", 339.7784, 'g', -1, 32},
{"32Exp", -5.09e25, 'g', -1, 32},
{"32NegExp", -5.11e-25, 'g', -1, 32},
{"32Fixed8Hard", math.Ldexp(15961084, -125), 'e', 8, 32},
{"32Fixed9Hard", math.Ldexp(14855922, -83), 'e', 9, 32},
{"64Fixed1", 123456, 'e', 3, 64},
{"64Fixed2", 123.456, 'e', 3, 64},
{"64Fixed3", 1.23456e+78, 'e', 3, 64},
{"64Fixed4", 1.23456e-78, 'e', 3, 64},
{"64Fixed12", 1.23456e-78, 'e', 12, 64},
{"64Fixed16", 1.23456e-78, 'e', 16, 64},
// From testdata/testfp.txt
{"64Fixed12Hard", math.Ldexp(6965949469487146, -249), 'e', 12, 64},
{"64Fixed17Hard", math.Ldexp(8887055249355788, 665), 'e', 17, 64},
{"64Fixed18Hard", math.Ldexp(6994187472632449, 690), 'e', 18, 64},
// Trigger slow path (see issue #15672).
{"Slowpath64", 622666234635.3213e-320, 'e', -1, 64},

311
src/strconv/ftoaryu.go Normal file
View file

@ -0,0 +1,311 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strconv
import (
"math/bits"
)
// binary to decimal conversion using the Ryū algorithm.
//
// See Ulf Adams, "Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369)
//
// Fixed precision formatting is a variant of the original paper's
// algorithm, where a single multiplication by 10^k is required,
// sharing the same rounding guarantees.
// ryuFtoaFixed32 formats mant*(2^exp) with prec decimal digits.
func ryuFtoaFixed32(d *decimalSlice, mant uint32, exp int, prec int) {
if prec < 0 {
panic("ryuFtoaFixed32 called with negative prec")
}
if prec > 9 {
panic("ryuFtoaFixed32 called with prec > 9")
}
// Zero input.
if mant == 0 {
d.nd, d.dp = 0, 0
return
}
// Renormalize to a 25-bit mantissa.
e2 := exp
if b := bits.Len32(mant); b < 25 {
mant <<= uint(25 - b)
e2 += int(b) - 25
}
// Choose an exponent such that rounded mant*(2^e2)*(10^q) has
// at least prec decimal digits, i.e
// mant*(2^e2)*(10^q) >= 10^(prec-1)
// Because mant >= 2^24, it is enough to choose:
// 2^(e2+24) >= 10^(-q+prec-1)
// or q = -mulByLog2Log10(e2+24) + prec - 1
q := -mulByLog2Log10(e2+24) + prec - 1
// Now compute mant*(2^e2)*(10^q).
// Is it an exact computation?
// Only small positive powers of 10 are exact (5^28 has 66 bits).
exact := q <= 27 && q >= 0
di, dexp2, d0 := mult64bitPow10(mant, e2, q)
if dexp2 >= 0 {
panic("not enough significant bits after mult64bitPow10")
}
// As a special case, computation might still be exact, if exponent
// was negative and if it amounts to computing an exact division.
// In that case, we ignore all lower bits.
// Note that division by 10^11 cannot be exact as 5^11 has 26 bits.
if q < 0 && q >= -10 && divisibleByPower5(uint64(mant), -q) {
exact = true
d0 = true
}
// Remove extra lower bits and keep rounding info.
extra := uint(-dexp2)
extraMask := uint32(1<<extra - 1)
di, dfrac := di>>extra, di&extraMask
roundUp := false
if exact {
// If we computed an exact product, d + 1/2
// should round to d+1 if 'd' is odd.
roundUp = dfrac > 1<<(extra-1) ||
(dfrac == 1<<(extra-1) && !d0) ||
(dfrac == 1<<(extra-1) && d0 && di&1 == 1)
} else {
// otherwise, d+1/2 always rounds up because
// we truncated below.
roundUp = dfrac>>(extra-1) == 1
}
if dfrac != 0 {
d0 = false
}
// Proceed to the requested number of digits
formatDecimal(d, uint64(di), !d0, roundUp, prec)
// Adjust exponent
d.dp -= q
}
// ryuFtoaFixed64 formats mant*(2^exp) with prec decimal digits.
func ryuFtoaFixed64(d *decimalSlice, mant uint64, exp int, prec int) {
if prec > 18 {
panic("ryuFtoaFixed64 called with prec > 18")
}
// Zero input.
if mant == 0 {
d.nd, d.dp = 0, 0
return
}
// Renormalize to a 55-bit mantissa.
e2 := exp
if b := bits.Len64(mant); b < 55 {
mant = mant << uint(55-b)
e2 += int(b) - 55
}
// Choose an exponent such that rounded mant*(2^e2)*(10^q) has
// at least prec decimal digits, i.e
// mant*(2^e2)*(10^q) >= 10^(prec-1)
// Because mant >= 2^54, it is enough to choose:
// 2^(e2+54) >= 10^(-q+prec-1)
// or q = -mulByLog2Log10(e2+54) + prec - 1
//
// The minimal required exponent is -mulByLog2Log10(1025)+18 = -291
// The maximal required exponent is mulByLog2Log10(1074)+18 = 342
q := -mulByLog2Log10(e2+54) + prec - 1
// Now compute mant*(2^e2)*(10^q).
// Is it an exact computation?
// Only small positive powers of 10 are exact (5^55 has 128 bits).
exact := q <= 55 && q >= 0
di, dexp2, d0 := mult128bitPow10(mant, e2, q)
if dexp2 >= 0 {
panic("not enough significant bits after mult128bitPow10")
}
// As a special case, computation might still be exact, if exponent
// was negative and if it amounts to computing an exact division.
// In that case, we ignore all lower bits.
// Note that division by 10^23 cannot be exact as 5^23 has 54 bits.
if q < 0 && q >= -22 && divisibleByPower5(mant, -q) {
exact = true
d0 = true
}
// Remove extra lower bits and keep rounding info.
extra := uint(-dexp2)
extraMask := uint64(1<<extra - 1)
di, dfrac := di>>extra, di&extraMask
roundUp := false
if exact {
// If we computed an exact product, d + 1/2
// should round to d+1 if 'd' is odd.
roundUp = dfrac > 1<<(extra-1) ||
(dfrac == 1<<(extra-1) && !d0) ||
(dfrac == 1<<(extra-1) && d0 && di&1 == 1)
} else {
// otherwise, d+1/2 always rounds up because
// we truncated below.
roundUp = dfrac>>(extra-1) == 1
}
if dfrac != 0 {
d0 = false
}
// Proceed to the requested number of digits
formatDecimal(d, di, !d0, roundUp, prec)
// Adjust exponent
d.dp -= q
}
// formatDecimal fills d with at most prec decimal digits
// of mantissa m. The boolean trunc indicates whether m
// is truncated compared to the original number being formatted.
func formatDecimal(d *decimalSlice, m uint64, trunc bool, roundUp bool, prec int) {
max := uint64pow10[prec]
trimmed := 0
for m >= max {
a, b := m/10, m%10
m = a
trimmed++
if b > 5 {
roundUp = true
} else if b < 5 {
roundUp = false
} else { // b == 5
// round up if there are trailing digits,
// or if the new value of m is odd (round-to-even convention)
roundUp = trunc || m&1 == 1
}
if b != 0 {
trunc = true
}
}
if roundUp {
m++
}
if m >= max {
// Happens if di was originally 99999....xx
m /= 10
trimmed++
}
// render digits (similar to formatBits)
n := uint(prec)
d.nd = int(prec)
v := m
for v >= 100 {
var v1, v2 uint64
if v>>32 == 0 {
v1, v2 = uint64(uint32(v)/100), uint64(uint32(v)%100)
} else {
v1, v2 = v/100, v%100
}
n -= 2
d.d[n+1] = smallsString[2*v2+1]
d.d[n+0] = smallsString[2*v2+0]
v = v1
}
if v > 0 {
n--
d.d[n] = smallsString[2*v+1]
}
if v >= 10 {
n--
d.d[n] = smallsString[2*v]
}
for d.d[d.nd-1] == '0' {
d.nd--
trimmed++
}
d.dp = d.nd + trimmed
}
// mulByLog2Log10 returns math.Floor(x * log(2)/log(10)) for an integer x in
// the range -1600 <= x && x <= +1600.
//
// The range restriction lets us work in faster integer arithmetic instead of
// slower floating point arithmetic. Correctness is verified by unit tests.
func mulByLog2Log10(x int) int {
// log(2)/log(10) ≈ 0.30102999566 ≈ 78913 / 2^18
return (x * 78913) >> 18
}
// mulByLog10Log2 returns math.Floor(x * log(10)/log(2)) for an integer x in
// the range -500 <= x && x <= +500.
//
// The range restriction lets us work in faster integer arithmetic instead of
// slower floating point arithmetic. Correctness is verified by unit tests.
func mulByLog10Log2(x int) int {
// log(10)/log(2) ≈ 3.32192809489 ≈ 108853 / 2^15
return (x * 108853) >> 15
}
// mult64bitPow10 takes a floating-point input with a 25-bit
// mantissa and multiplies it with 10^q. The resulting mantissa
// is m*P >> 57 where P is a 64-bit element of the detailedPowersOfTen tables.
// It is typically 31 or 32-bit wide.
// The returned boolean is true if all trimmed bits were zero.
//
// That is:
// m*2^e2 * round(10^q) = resM * 2^resE + ε
// exact = ε == 0
func mult64bitPow10(m uint32, e2, q int) (resM uint32, resE int, exact bool) {
if q == 0 {
return m << 7, e2 - 7, true
}
if q < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < q {
// This never happens due to the range of float32/float64 exponent
panic("mult64bitPow10: power of 10 is out of range")
}
pow := detailedPowersOfTen[q-detailedPowersOfTenMinExp10][1]
if q < 0 {
// Inverse powers of ten must be rounded up.
pow += 1
}
hi, lo := bits.Mul64(uint64(m), pow)
e2 += mulByLog10Log2(q) - 63 + 57
return uint32(hi<<7 | lo>>57), e2, lo<<7 == 0
}
// mult128bitPow10 takes a floating-point input with a 55-bit
// mantissa and multiplies it with 10^q. The resulting mantissa
// is m*P >> 119 where P is a 128-bit element of the detailedPowersOfTen tables.
// It is typically 63 or 64-bit wide.
// The returned boolean is true is all trimmed bits were zero.
//
// That is:
// m*2^e2 * round(10^q) = resM * 2^resE + ε
// exact = ε == 0
func mult128bitPow10(m uint64, e2, q int) (resM uint64, resE int, exact bool) {
if q == 0 {
return m << 9, e2 - 9, true
}
if q < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < q {
// This never happens due to the range of float32/float64 exponent
panic("mult128bitPow10: power of 10 is out of range")
}
pow := detailedPowersOfTen[q-detailedPowersOfTenMinExp10]
if q < 0 {
// Inverse powers of ten must be rounded up.
pow[0] += 1
}
e2 += mulByLog10Log2(q) - 127 + 119
// long multiplication
l1, l0 := bits.Mul64(m, pow[0])
h1, h0 := bits.Mul64(m, pow[1])
mid, carry := bits.Add64(l1, h0, 0)
h1 += carry
return h1<<9 | mid>>55, e2, mid<<9 == 0 && l0 == 0
}
func divisibleByPower5(m uint64, k int) bool {
if m == 0 {
return true
}
for i := 0; i < k; i++ {
if m%5 != 0 {
return false
}
m /= 5
}
return true
}

View file

@ -0,0 +1,31 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strconv_test
import (
"math"
. "strconv"
"testing"
)
func TestMulByLog2Log10(t *testing.T) {
for x := -1600; x <= +1600; x++ {
iMath := MulByLog2Log10(x)
fMath := int(math.Floor(float64(x) * math.Ln2 / math.Ln10))
if iMath != fMath {
t.Errorf("mulByLog2Log10(%d) failed: %d vs %d\n", x, iMath, fMath)
}
}
}
func TestMulByLog10Log2(t *testing.T) {
for x := -500; x <= +500; x++ {
iMath := MulByLog10Log2(x)
fMath := int(math.Floor(float64(x) * math.Ln10 / math.Ln2))
if iMath != fMath {
t.Errorf("mulByLog10Log2(%d) failed: %d vs %d\n", x, iMath, fMath)
}
}
}

View file

@ -21,3 +21,11 @@ func SetOptimize(b bool) bool {
func ParseFloatPrefix(s string, bitSize int) (float64, int, error) {
return parseFloatPrefix(s, bitSize)
}
func MulByLog2Log10(x int) int {
return mulByLog2Log10(x)
}
func MulByLog10Log2(x int) int {
return mulByLog10Log2(x)
}