cmd/compile/internal/syntax: allow more than one rune "unread"

Make it possible to "unread" more than one byte before the most
recently read rune. Use a better name than ungetr2 and make it
slightly more efficient.

R=Go1.13

Change-Id: I45d5dfa11e508259a972ca6560d1f78d7a51fe15
Reviewed-on: https://go-review.googlesource.com/c/158957
Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
Robert Griesemer 2019-01-22 15:20:50 -08:00
parent 701f6a27b5
commit 58365b34bb
2 changed files with 32 additions and 23 deletions

View file

@ -150,7 +150,7 @@ redo:
case '.':
c = s.getr()
if isDigit(c) {
s.ungetr2()
s.unread(1)
s.number('.')
break
}
@ -160,7 +160,7 @@ redo:
s.tok = _DotDotDot
break
}
s.ungetr2()
s.unread(1)
}
s.ungetr()
s.tok = _Dot

View file

@ -22,6 +22,9 @@ import (
const linebase = 1
const colbase = 1
// max. number of bytes to unread
const maxunread = 10
// buf [...read...|...|...unread...|s|...free...]
// ^ ^ ^ ^
// | | | |
@ -59,20 +62,21 @@ func (s *source) init(src io.Reader, errh func(line, pos uint, msg string)) {
s.suf = -1
}
// ungetr ungets the most recently read rune.
// ungetr sets the reading position to a previous reading
// position, usually the one of the most recently read
// rune, but possibly earlier (see unread below).
func (s *source) ungetr() {
s.r, s.line, s.col = s.r0, s.line0, s.col0
}
// ungetr2 is like ungetr but enables a 2nd ungetr.
// It must not be called if one of the runes seen
// was a newline or had a UTF-8 encoding longer than
// 1 byte.
func (s *source) ungetr2() {
s.ungetr()
// line must not have changed
s.r0--
s.col0--
// unread moves the previous reading position to a position
// that is n bytes earlier in the source. The next ungetr
// call will set the reading position to that moved position.
// The "unread" runes must be single byte and not contain any
// newlines; and 0 <= n <= maxunread must hold.
func (s *source) unread(n int) {
s.r0 -= n
s.col0 -= uint(n)
}
func (s *source) error(msg string) {
@ -142,7 +146,7 @@ redo:
// BOM's are only allowed as the first character in a file
const BOM = 0xfeff
if r == BOM {
if s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1)
if s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to maxunread)
s.error("invalid BOM in the middle of the file")
}
goto redo
@ -153,20 +157,25 @@ redo:
func (s *source) fill() {
// Slide unread bytes to beginning but preserve last read char
// (for one ungetr call) plus one extra byte (for a 2nd ungetr
// call, only for ".." character sequence and float literals
// starting with ".").
if s.r0 > 1 {
// (for one ungetr call) plus maxunread extra bytes (for one
// unread call).
if s.r0 > maxunread {
n := s.r0 - maxunread // number of bytes to slide down
// save literal prefix, if any
// (We see at most one ungetr call while reading
// a literal, so make sure s.r0 remains in buf.)
// (make sure we keep maxunread bytes and the last
// read char in the buffer)
if s.suf >= 0 {
s.lit = append(s.lit, s.buf[s.suf:s.r0]...)
s.suf = 1 // == s.r0 after slide below
// we have a literal
if s.suf < n {
// save literal prefix
s.lit = append(s.lit, s.buf[s.suf:n]...)
s.suf = 0
} else {
s.suf -= n
}
}
n := s.r0 - 1
copy(s.buf[:], s.buf[n:s.w])
s.r0 = 1 // eqv: s.r0 -= n
s.r0 = maxunread // eqv: s.r0 -= n
s.r -= n
s.w -= n
}