cmd/gc: fix float32 const conversion and printing of big float consts

The float32 const conversion used to round to float64 and then use the hardware to round to float32. Even though there was a range check before this conversion, the double rounding introduced inaccuracy: the round to float64 might round the value further away from the float32 range, reaching a float64 value that could not actually be rounded to float32. The hardware appears to give us 0 in that case, but it is probably undefined. Double rounding also meant that the wrong value might be used for certain border cases. Do the rounding the float32 ourselves, just as we already did the rounding to float64. This makes the conversion precise and also makes the conversion match the range check. Finally, add some code to print very large (bigger than float64) floating point constants in decimal floating point notation instead of falling back to the precise but human-unreadable binary floating point notation. Fixes #8015. LGTM=iant R=golang-codereviews, iant CC=golang-codereviews, r https://golang.org/cl/100580044
2024-10-02 22:25:08 +00:00 · 2014-05-19 22:57:59 -04:00 · 2014-05-19 22:57:59 -04:00 · 60be4a2450
parent 661298358c
commit 60be4a2450
6 changed files with 128 additions and 26 deletions
--- a/src/cmd/gc/const.c
+++ b/src/cmd/gc/const.c
@ -22,19 +22,22 @@ Mpflt*
 truncfltlit(Mpflt *oldv, Type *t)
 {
 	double d;
-	float f;
 	Mpflt *fv;
+	Val v;

 	if(t == T)
 		return oldv;

+	memset(&v, 0, sizeof v);
+	v.ctype = CTFLT;
+	v.u.fval = oldv;
+	overflow(v, t);
+
 	fv = mal(sizeof *fv);
 	*fv = *oldv;

 	// convert large precision literal floating
 	// into limited precision (float64 or float32)
-	// botch -- this assumes that compiler fp
-	//    has same precision as runtime fp
 	switch(t->etype) {
 	case TFLOAT64:
 		d = mpgetflt(fv);
@ -42,10 +45,9 @@ truncfltlit(Mpflt *oldv, Type *t)
 		break;

 	case TFLOAT32:
-		d = mpgetflt(fv);
-		f = d;
-		d = f;
+		d = mpgetflt32(fv);
 		mpmovecflt(fv, d);
+
 		break;
 	}
 	return fv;
@ -235,7 +237,6 @@ convlit1(Node **np, Type *t, int explicit)
 				n->val = toflt(n->val);
 				// flowthrough
 			case CTFLT:
-				overflow(n->val, t);
 				n->val.u.fval = truncfltlit(n->val.u.fval, t);
 				break;
 			}
--- a/src/cmd/gc/go.h
+++ b/src/cmd/gc/go.h
@ -1254,6 +1254,7 @@ void	mpxorfixfix(Mpint *a, Mpint *b);
 void	mpaddfltflt(Mpflt *a, Mpflt *b);
 void	mpdivfltflt(Mpflt *a, Mpflt *b);
 double	mpgetflt(Mpflt *a);
+double	mpgetflt32(Mpflt *a);
 void	mpmovecflt(Mpflt *a, double c);
 void	mpmulfltflt(Mpflt *a, Mpflt *b);
 void	mpnegflt(Mpflt *a);
--- a/src/cmd/gc/mparith1.c
+++ b/src/cmd/gc/mparith1.c
@ -579,20 +579,40 @@ Fconv(Fmt *fp)
 {
 	char buf[500];
 	Mpflt *fvp, fv;
-	double d;
+	double d, dexp;
+	int exp;

 	fvp = va_arg(fp->args, Mpflt*);
 	if(fp->flags & FmtSharp) {
 		// alternate form - decimal for error messages.
 		// for well in range, convert to double and use print's %g
-		if(-900 < fvp->exp && fvp->exp < 900) {
+		exp = fvp->exp + sigfig(fvp)*Mpscale;
+		if(-900 < exp && exp < 900) {
 			d = mpgetflt(fvp);
 			if(d >= 0 && (fp->flags & FmtSign))
 				fmtprint(fp, "+");
-			return fmtprint(fp, "%g", d);
+			return fmtprint(fp, "%g", d, exp, fvp);
 		}
-		// TODO(rsc): for well out of range, print
-		// an approximation like 1.234e1000
+		
+		// very out of range. compute decimal approximation by hand.
+		// decimal exponent
+		dexp = fvp->exp * 0.301029995663981195; // log_10(2)
+		exp = (int)dexp;
+		// decimal mantissa
+		fv = *fvp;
+		fv.val.neg = 0;
+		fv.exp = 0;
+		d = mpgetflt(&fv);
+		d *= pow(10, dexp-exp);
+		while(d >= 9.99995) {
+			d /= 10;
+			exp++;
+		}
+		if(fvp->val.neg)
+			fmtprint(fp, "-");
+		else if(fp->flags & FmtSign)
+			fmtprint(fp, "+");
+		return fmtprint(fp, "%.5fe+%d", d, exp);
 	}

 	if(sigfig(fvp) == 0) {
--- a/src/cmd/gc/mparith3.c
+++ b/src/cmd/gc/mparith3.c
@ -199,10 +199,10 @@ mpdivfltflt(Mpflt *a, Mpflt *b)
 		print(" = %F\n\n", a);
 }

-double
-mpgetflt(Mpflt *a)
+static double
+mpgetfltN(Mpflt *a, int prec, int bias)
 {
-	int s, i, e;
+	int s, i, e, minexp;
 	uvlong v, vm;
 	double f;

@ -226,12 +226,8 @@ mpgetflt(Mpflt *a)
 			return 0;
 	}

-	// the magic numbers (64, 63, 53, 10, -1074) are
-	// IEEE specific. this should be done machine
-	// independently or in the 6g half of the compiler
-
 	// pick up the mantissa and a rounding bit in a uvlong
-	s = 53+1;
+	s = prec+1;
 	v = 0;
 	for(i=Mpnorm-1; s>=Mpscale; i--) {
 		v = (v<<Mpscale) | a->val.a[i];
@ -251,14 +247,15 @@ mpgetflt(Mpflt *a)
 		v = (v<<s) | (a->val.a[i]>>(Mpscale-s));

 	// gradual underflow
-	e = Mpnorm*Mpscale + a->exp - 53;
-	if(e < -1074) {
-		s = -e - 1074;
-		if(s > 54)
-			s = 54;
+	e = Mpnorm*Mpscale + a->exp - prec;
+	minexp = bias+1-prec+1;
+	if(e < minexp) {
+		s = minexp - e;
+		if(s > prec+1)
+			s = prec+1;
 		v |= vm & ((1ULL<<s) - 1);
 		vm >>= s;
-		e = -1074;
+		e = minexp;
 	}

 //print("vm=%.16llux v=%.16llux\n", vm, v);
@ -276,6 +273,18 @@ mpgetflt(Mpflt *a)
 	return f;
 }

+double
+mpgetflt(Mpflt *a)
+{
+	return mpgetfltN(a, 53, -1023);
+}
+
+double
+mpgetflt32(Mpflt *a)
+{
+	return mpgetfltN(a, 24, -127);
+}
+
 void
 mpmovecflt(Mpflt *a, double c)
 {
--- a/test/float_lit2.go
+++ b/test/float_lit2.go
@ -0,0 +1,43 @@
+// run
+
+// Check conversion of constant to float32/float64 near min/max boundaries.
+
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+)
+
+var cvt = []struct {
+	val    interface{}
+	binary string
+}{
+	{float32(-340282356779733661637539395458142568447), "-16777215p+104"},
+	{float32(-340282326356119256160033759537265639424), "-16777214p+104"},
+	{float32(340282326356119256160033759537265639424), "16777214p+104"},
+	{float32(340282356779733661637539395458142568447), "16777215p+104"},
+	{float64(-1.797693134862315807937289714053e+308), "-9007199254740991p+971"},
+	{float64(-1.797693134862315708145274237317e+308), "-9007199254740991p+971"},
+	{float64(-1.797693134862315608353258760581e+308), "-9007199254740990p+971"},
+	{float64(1.797693134862315608353258760581e+308), "9007199254740990p+971"},
+	{float64(1.797693134862315708145274237317e+308), "9007199254740991p+971"},
+	{float64(1.797693134862315807937289714053e+308), "9007199254740991p+971"},
+}
+
+func main() {
+	bug := false
+	for i, c := range cvt {
+		s := fmt.Sprintf("%b", c.val)
+		if s != c.binary {
+			if !bug {
+				bug = true
+				fmt.Println("BUG")
+			}
+			fmt.Printf("#%d: have %s, want %s\n", i, s, c.binary)
+		}
+	}
+}
--- a/test/float_lit3.go
+++ b/test/float_lit3.go
@ -0,0 +1,28 @@
+// errorcheck
+
+// Check flagging of invalid conversion of constant to float32/float64 near min/max boundaries.
+
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+var x = []interface{}{
+	float32(-340282356779733661637539395458142568448), // ERROR "constant -3\.40282e\+38 overflows float32"
+	float32(-340282356779733661637539395458142568447),
+	float32(-340282326356119256160033759537265639424),
+	float32(340282326356119256160033759537265639424),
+	float32(340282356779733661637539395458142568447),
+	float32(340282356779733661637539395458142568448), // ERROR "constant 3\.40282e\+38 overflows float32"
+	-1e1000, // ERROR "constant -1\.00000e\+1000 overflows float64"
+	float64(-1.797693134862315907937289714053e+308), // ERROR "constant -1\.79769e\+308 overflows float64"
+	float64(-1.797693134862315807937289714053e+308),
+	float64(-1.797693134862315708145274237317e+308),
+	float64(-1.797693134862315608353258760581e+308),
+	float64(1.797693134862315608353258760581e+308),
+	float64(1.797693134862315708145274237317e+308),
+	float64(1.797693134862315807937289714053e+308),
+	float64(1.797693134862315907937289714053e+308), // ERROR "constant 1\.79769e\+308 overflows float64"
+	1e1000, // ERROR "constant 1\.00000e\+1000 overflows float64"
+}