mirror of
https://github.com/golang/go
synced 2024-10-06 16:10:03 +00:00
text/scanner: don't liberally consume (invalid) floats or underbars
This is a follow-up on https://golang.org/cl/161199 which introduced the new Go 2 number literals to text/scanner. That change introduced a bug by allowing decimal and hexadecimal floats to be consumed even if the scanner was not configured to accept floats. This CL changes the code to not consume a radix dot '.' or exponent unless the scanner is configured to accept floats. This CL also introduces a new mode "AllowNumberbars" which controls whether underbars '_' are permitted as digit separators in numbers or not. There is a possibility that we may need to refine text/scanner further (e.g., the Float mode now includes hexadecimal floats which it didn't recognize before). We're very early in the cycle, so let's see how it goes. RELNOTE=yes Updates #12711. Updates #19308. Updates #28493. Updates #29008. Fixes #30320. Change-Id: I6481d314f0384e09ef6803ffad38dc529b1e89a3 Reviewed-on: https://go-review.googlesource.com/c/163079 Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
parent
153c0da89b
commit
34fb5855eb
|
@ -457,3 +457,4 @@ pkg syscall (freebsd-arm-cgo), type Stat_t struct, Nlink uint16
|
|||
pkg syscall (freebsd-arm-cgo), type Stat_t struct, Rdev uint32
|
||||
pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntfromname [88]int8
|
||||
pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntonname [88]int8
|
||||
pkg text/scanner, const GoTokens = 1012
|
|
@ -0,0 +1,3 @@
|
|||
pkg text/scanner, const AllowNumberbars = 1024
|
||||
pkg text/scanner, const AllowNumberbars ideal-int
|
||||
pkg text/scanner, const GoTokens = 2036
|
|
@ -59,15 +59,16 @@ func (pos Position) String() string {
|
|||
// "foo" is scanned as the token sequence '"' Ident '"'.
|
||||
//
|
||||
const (
|
||||
ScanIdents = 1 << -Ident
|
||||
ScanInts = 1 << -Int
|
||||
ScanFloats = 1 << -Float // includes Ints
|
||||
ScanChars = 1 << -Char
|
||||
ScanStrings = 1 << -String
|
||||
ScanRawStrings = 1 << -RawString
|
||||
ScanComments = 1 << -Comment
|
||||
SkipComments = 1 << -skipComment // if set with ScanComments, comments become white space
|
||||
GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments
|
||||
ScanIdents = 1 << -Ident
|
||||
ScanInts = 1 << -Int
|
||||
ScanFloats = 1 << -Float // includes Ints and hexadecimal floats
|
||||
ScanChars = 1 << -Char
|
||||
ScanStrings = 1 << -String
|
||||
ScanRawStrings = 1 << -RawString
|
||||
ScanComments = 1 << -Comment
|
||||
SkipComments = 1 << -skipComment // if set with ScanComments, comments become white space
|
||||
AllowNumberbars = 1 << -allowNumberbars // if set, number literals may contain underbars as digit separators
|
||||
GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments | AllowNumberbars
|
||||
)
|
||||
|
||||
// The result of Scan is one of these tokens or a Unicode character.
|
||||
|
@ -80,7 +81,10 @@ const (
|
|||
String
|
||||
RawString
|
||||
Comment
|
||||
|
||||
// internal use only
|
||||
skipComment
|
||||
allowNumberbars
|
||||
)
|
||||
|
||||
var tokenString = map[rune]string{
|
||||
|
@ -359,7 +363,8 @@ func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case c
|
|||
func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
|
||||
func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
|
||||
|
||||
// digits accepts the sequence { digit | '_' } starting with ch0.
|
||||
// digits accepts the sequence { digit } (if AllowNumberbars is not set)
|
||||
// or { digit | '_' } (if AllowNumberbars is set), starting with ch0.
|
||||
// If base <= 10, digits accepts any decimal digit but records
|
||||
// the first invalid digit >= base in *invalid if *invalid == 0.
|
||||
// digits returns the first rune that is not part of the sequence
|
||||
|
@ -369,7 +374,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
|
|||
ch = ch0
|
||||
if base <= 10 {
|
||||
max := rune('0' + base)
|
||||
for isDecimal(ch) || ch == '_' {
|
||||
for isDecimal(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 {
|
||||
ds := 1
|
||||
if ch == '_' {
|
||||
ds = 2
|
||||
|
@ -380,7 +385,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
|
|||
ch = s.next()
|
||||
}
|
||||
} else {
|
||||
for isHex(ch) || ch == '_' {
|
||||
for isHex(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 {
|
||||
ds := 1
|
||||
if ch == '_' {
|
||||
ds = 2
|
||||
|
@ -392,7 +397,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
|
|||
return
|
||||
}
|
||||
|
||||
func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
|
||||
func (s *Scanner) scanNumber(ch rune, seenDot bool) (rune, rune) {
|
||||
base := 10 // number base
|
||||
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
|
||||
digsep := 0 // bit 0: digit present, bit 1: '_' present
|
||||
|
@ -401,7 +406,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
|
|||
// integer part
|
||||
var tok rune
|
||||
var ds int
|
||||
if integerPart {
|
||||
if !seenDot {
|
||||
tok = Int
|
||||
if ch == '0' {
|
||||
ch = s.next()
|
||||
|
@ -422,17 +427,18 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
|
|||
}
|
||||
ch, ds = s.digits(ch, base, &invalid)
|
||||
digsep |= ds
|
||||
if ch == '.' && s.Mode&ScanFloats != 0 {
|
||||
ch = s.next()
|
||||
seenDot = true
|
||||
}
|
||||
}
|
||||
|
||||
// fractional part
|
||||
if !integerPart || ch == '.' {
|
||||
if seenDot {
|
||||
tok = Float
|
||||
if prefix == 'o' || prefix == 'b' {
|
||||
s.error("invalid radix point in " + litname(prefix))
|
||||
}
|
||||
if ch == '.' {
|
||||
ch = s.next()
|
||||
}
|
||||
ch, ds = s.digits(ch, base, &invalid)
|
||||
digsep |= ds
|
||||
}
|
||||
|
@ -442,7 +448,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
|
|||
}
|
||||
|
||||
// exponent
|
||||
if e := lower(ch); e == 'e' || e == 'p' {
|
||||
if e := lower(ch); (e == 'e' || e == 'p') && s.Mode&ScanFloats != 0 {
|
||||
switch {
|
||||
case e == 'e' && prefix != 0 && prefix != '0':
|
||||
s.errorf("%q exponent requires decimal mantissa", ch)
|
||||
|
@ -682,7 +688,7 @@ redo:
|
|||
}
|
||||
case isDecimal(ch):
|
||||
if s.Mode&(ScanInts|ScanFloats) != 0 {
|
||||
tok, ch = s.scanNumber(ch, true)
|
||||
tok, ch = s.scanNumber(ch, false)
|
||||
} else {
|
||||
ch = s.next()
|
||||
}
|
||||
|
@ -705,7 +711,7 @@ redo:
|
|||
case '.':
|
||||
ch = s.next()
|
||||
if isDecimal(ch) && s.Mode&ScanFloats != 0 {
|
||||
tok, ch = s.scanNumber(ch, false)
|
||||
tok, ch = s.scanNumber(ch, true)
|
||||
}
|
||||
case '/':
|
||||
ch = s.next()
|
||||
|
|
|
@ -877,3 +877,40 @@ func TestNumbers(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIssue30320(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
in, want string
|
||||
mode uint
|
||||
}{
|
||||
{"foo01.bar31.xx-0-1-1-0", "01 31 0 1 1 0", ScanInts},
|
||||
{"foo0/12/0/5.67", "0 12 0 5 67", ScanInts},
|
||||
{"xxx1e0yyy", "1 0", ScanInts},
|
||||
{"1_2", "1 2", ScanInts}, // don't consume _ as part of a number if not explicitly enabled
|
||||
{"1_2", "1_2", ScanInts | AllowNumberbars},
|
||||
{"xxx1.0yyy2e3ee", "1 0 2 3", ScanInts},
|
||||
{"xxx1.0yyy2e3ee", "1.0 2e3", ScanFloats},
|
||||
} {
|
||||
got := extractInts(test.in, test.mode)
|
||||
if got != test.want {
|
||||
t.Errorf("%q: got %q; want %q", test.in, got, test.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func extractInts(t string, mode uint) (res string) {
|
||||
var s Scanner
|
||||
s.Init(strings.NewReader(t))
|
||||
s.Mode = mode
|
||||
for {
|
||||
switch tok := s.Scan(); tok {
|
||||
case Int, Float:
|
||||
if len(res) > 0 {
|
||||
res += " "
|
||||
}
|
||||
res += s.TokenText()
|
||||
case EOF:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue