mirror of
https://github.com/golang/go
synced 2024-10-02 22:25:08 +00:00
cmd/gc: fix float32 const conversion and printing of big float consts
The float32 const conversion used to round to float64 and then use the hardware to round to float32. Even though there was a range check before this conversion, the double rounding introduced inaccuracy: the round to float64 might round the value further away from the float32 range, reaching a float64 value that could not actually be rounded to float32. The hardware appears to give us 0 in that case, but it is probably undefined. Double rounding also meant that the wrong value might be used for certain border cases. Do the rounding the float32 ourselves, just as we already did the rounding to float64. This makes the conversion precise and also makes the conversion match the range check. Finally, add some code to print very large (bigger than float64) floating point constants in decimal floating point notation instead of falling back to the precise but human-unreadable binary floating point notation. Fixes #8015. LGTM=iant R=golang-codereviews, iant CC=golang-codereviews, r https://golang.org/cl/100580044
This commit is contained in:
parent
661298358c
commit
60be4a2450
|
@ -22,19 +22,22 @@ Mpflt*
|
|||
truncfltlit(Mpflt *oldv, Type *t)
|
||||
{
|
||||
double d;
|
||||
float f;
|
||||
Mpflt *fv;
|
||||
Val v;
|
||||
|
||||
if(t == T)
|
||||
return oldv;
|
||||
|
||||
memset(&v, 0, sizeof v);
|
||||
v.ctype = CTFLT;
|
||||
v.u.fval = oldv;
|
||||
overflow(v, t);
|
||||
|
||||
fv = mal(sizeof *fv);
|
||||
*fv = *oldv;
|
||||
|
||||
// convert large precision literal floating
|
||||
// into limited precision (float64 or float32)
|
||||
// botch -- this assumes that compiler fp
|
||||
// has same precision as runtime fp
|
||||
switch(t->etype) {
|
||||
case TFLOAT64:
|
||||
d = mpgetflt(fv);
|
||||
|
@ -42,10 +45,9 @@ truncfltlit(Mpflt *oldv, Type *t)
|
|||
break;
|
||||
|
||||
case TFLOAT32:
|
||||
d = mpgetflt(fv);
|
||||
f = d;
|
||||
d = f;
|
||||
d = mpgetflt32(fv);
|
||||
mpmovecflt(fv, d);
|
||||
|
||||
break;
|
||||
}
|
||||
return fv;
|
||||
|
@ -235,7 +237,6 @@ convlit1(Node **np, Type *t, int explicit)
|
|||
n->val = toflt(n->val);
|
||||
// flowthrough
|
||||
case CTFLT:
|
||||
overflow(n->val, t);
|
||||
n->val.u.fval = truncfltlit(n->val.u.fval, t);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1254,6 +1254,7 @@ void mpxorfixfix(Mpint *a, Mpint *b);
|
|||
void mpaddfltflt(Mpflt *a, Mpflt *b);
|
||||
void mpdivfltflt(Mpflt *a, Mpflt *b);
|
||||
double mpgetflt(Mpflt *a);
|
||||
double mpgetflt32(Mpflt *a);
|
||||
void mpmovecflt(Mpflt *a, double c);
|
||||
void mpmulfltflt(Mpflt *a, Mpflt *b);
|
||||
void mpnegflt(Mpflt *a);
|
||||
|
|
|
@ -579,20 +579,40 @@ Fconv(Fmt *fp)
|
|||
{
|
||||
char buf[500];
|
||||
Mpflt *fvp, fv;
|
||||
double d;
|
||||
double d, dexp;
|
||||
int exp;
|
||||
|
||||
fvp = va_arg(fp->args, Mpflt*);
|
||||
if(fp->flags & FmtSharp) {
|
||||
// alternate form - decimal for error messages.
|
||||
// for well in range, convert to double and use print's %g
|
||||
if(-900 < fvp->exp && fvp->exp < 900) {
|
||||
exp = fvp->exp + sigfig(fvp)*Mpscale;
|
||||
if(-900 < exp && exp < 900) {
|
||||
d = mpgetflt(fvp);
|
||||
if(d >= 0 && (fp->flags & FmtSign))
|
||||
fmtprint(fp, "+");
|
||||
return fmtprint(fp, "%g", d);
|
||||
return fmtprint(fp, "%g", d, exp, fvp);
|
||||
}
|
||||
// TODO(rsc): for well out of range, print
|
||||
// an approximation like 1.234e1000
|
||||
|
||||
// very out of range. compute decimal approximation by hand.
|
||||
// decimal exponent
|
||||
dexp = fvp->exp * 0.301029995663981195; // log_10(2)
|
||||
exp = (int)dexp;
|
||||
// decimal mantissa
|
||||
fv = *fvp;
|
||||
fv.val.neg = 0;
|
||||
fv.exp = 0;
|
||||
d = mpgetflt(&fv);
|
||||
d *= pow(10, dexp-exp);
|
||||
while(d >= 9.99995) {
|
||||
d /= 10;
|
||||
exp++;
|
||||
}
|
||||
if(fvp->val.neg)
|
||||
fmtprint(fp, "-");
|
||||
else if(fp->flags & FmtSign)
|
||||
fmtprint(fp, "+");
|
||||
return fmtprint(fp, "%.5fe+%d", d, exp);
|
||||
}
|
||||
|
||||
if(sigfig(fvp) == 0) {
|
||||
|
|
|
@ -199,10 +199,10 @@ mpdivfltflt(Mpflt *a, Mpflt *b)
|
|||
print(" = %F\n\n", a);
|
||||
}
|
||||
|
||||
double
|
||||
mpgetflt(Mpflt *a)
|
||||
static double
|
||||
mpgetfltN(Mpflt *a, int prec, int bias)
|
||||
{
|
||||
int s, i, e;
|
||||
int s, i, e, minexp;
|
||||
uvlong v, vm;
|
||||
double f;
|
||||
|
||||
|
@ -226,12 +226,8 @@ mpgetflt(Mpflt *a)
|
|||
return 0;
|
||||
}
|
||||
|
||||
// the magic numbers (64, 63, 53, 10, -1074) are
|
||||
// IEEE specific. this should be done machine
|
||||
// independently or in the 6g half of the compiler
|
||||
|
||||
// pick up the mantissa and a rounding bit in a uvlong
|
||||
s = 53+1;
|
||||
s = prec+1;
|
||||
v = 0;
|
||||
for(i=Mpnorm-1; s>=Mpscale; i--) {
|
||||
v = (v<<Mpscale) | a->val.a[i];
|
||||
|
@ -251,14 +247,15 @@ mpgetflt(Mpflt *a)
|
|||
v = (v<<s) | (a->val.a[i]>>(Mpscale-s));
|
||||
|
||||
// gradual underflow
|
||||
e = Mpnorm*Mpscale + a->exp - 53;
|
||||
if(e < -1074) {
|
||||
s = -e - 1074;
|
||||
if(s > 54)
|
||||
s = 54;
|
||||
e = Mpnorm*Mpscale + a->exp - prec;
|
||||
minexp = bias+1-prec+1;
|
||||
if(e < minexp) {
|
||||
s = minexp - e;
|
||||
if(s > prec+1)
|
||||
s = prec+1;
|
||||
v |= vm & ((1ULL<<s) - 1);
|
||||
vm >>= s;
|
||||
e = -1074;
|
||||
e = minexp;
|
||||
}
|
||||
|
||||
//print("vm=%.16llux v=%.16llux\n", vm, v);
|
||||
|
@ -276,6 +273,18 @@ mpgetflt(Mpflt *a)
|
|||
return f;
|
||||
}
|
||||
|
||||
double
|
||||
mpgetflt(Mpflt *a)
|
||||
{
|
||||
return mpgetfltN(a, 53, -1023);
|
||||
}
|
||||
|
||||
double
|
||||
mpgetflt32(Mpflt *a)
|
||||
{
|
||||
return mpgetfltN(a, 24, -127);
|
||||
}
|
||||
|
||||
void
|
||||
mpmovecflt(Mpflt *a, double c)
|
||||
{
|
||||
|
|
43
test/float_lit2.go
Normal file
43
test/float_lit2.go
Normal file
|
@ -0,0 +1,43 @@
|
|||
// run
|
||||
|
||||
// Check conversion of constant to float32/float64 near min/max boundaries.
|
||||
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
var cvt = []struct {
|
||||
val interface{}
|
||||
binary string
|
||||
}{
|
||||
{float32(-340282356779733661637539395458142568447), "-16777215p+104"},
|
||||
{float32(-340282326356119256160033759537265639424), "-16777214p+104"},
|
||||
{float32(340282326356119256160033759537265639424), "16777214p+104"},
|
||||
{float32(340282356779733661637539395458142568447), "16777215p+104"},
|
||||
{float64(-1.797693134862315807937289714053e+308), "-9007199254740991p+971"},
|
||||
{float64(-1.797693134862315708145274237317e+308), "-9007199254740991p+971"},
|
||||
{float64(-1.797693134862315608353258760581e+308), "-9007199254740990p+971"},
|
||||
{float64(1.797693134862315608353258760581e+308), "9007199254740990p+971"},
|
||||
{float64(1.797693134862315708145274237317e+308), "9007199254740991p+971"},
|
||||
{float64(1.797693134862315807937289714053e+308), "9007199254740991p+971"},
|
||||
}
|
||||
|
||||
func main() {
|
||||
bug := false
|
||||
for i, c := range cvt {
|
||||
s := fmt.Sprintf("%b", c.val)
|
||||
if s != c.binary {
|
||||
if !bug {
|
||||
bug = true
|
||||
fmt.Println("BUG")
|
||||
}
|
||||
fmt.Printf("#%d: have %s, want %s\n", i, s, c.binary)
|
||||
}
|
||||
}
|
||||
}
|
28
test/float_lit3.go
Normal file
28
test/float_lit3.go
Normal file
|
@ -0,0 +1,28 @@
|
|||
// errorcheck
|
||||
|
||||
// Check flagging of invalid conversion of constant to float32/float64 near min/max boundaries.
|
||||
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
var x = []interface{}{
|
||||
float32(-340282356779733661637539395458142568448), // ERROR "constant -3\.40282e\+38 overflows float32"
|
||||
float32(-340282356779733661637539395458142568447),
|
||||
float32(-340282326356119256160033759537265639424),
|
||||
float32(340282326356119256160033759537265639424),
|
||||
float32(340282356779733661637539395458142568447),
|
||||
float32(340282356779733661637539395458142568448), // ERROR "constant 3\.40282e\+38 overflows float32"
|
||||
-1e1000, // ERROR "constant -1\.00000e\+1000 overflows float64"
|
||||
float64(-1.797693134862315907937289714053e+308), // ERROR "constant -1\.79769e\+308 overflows float64"
|
||||
float64(-1.797693134862315807937289714053e+308),
|
||||
float64(-1.797693134862315708145274237317e+308),
|
||||
float64(-1.797693134862315608353258760581e+308),
|
||||
float64(1.797693134862315608353258760581e+308),
|
||||
float64(1.797693134862315708145274237317e+308),
|
||||
float64(1.797693134862315807937289714053e+308),
|
||||
float64(1.797693134862315907937289714053e+308), // ERROR "constant 1\.79769e\+308 overflows float64"
|
||||
1e1000, // ERROR "constant 1\.00000e\+1000 overflows float64"
|
||||
}
|
Loading…
Reference in a new issue