text/template/parse: simplify I/O in lexing

The concurrent model for delivering tokens was fine for pedagogy,
but has caused a few problems as the package has evolved (that is,
got more complicated). It's easy to eliminate it, simplifying or
removing some of the hacks used to work around these prolems.

The old lexer would deliver tokens over a channel to the parsing
goroutine, and continue running until EOF. In this rewrite, we
instead run the machine until a token is ready, and shut it down
until the next token is needed. The mechanism is just to return nil
as the state function, which requires a bit more threading of return
values through the state functions but is not difficult. The change
is modest.

A couple of error messages change, but otherwise the change has no
external effect. This is just an internal cleanup, long overdue.

benchmark                      old ns/op     new ns/op     delta
BenchmarkParseLarge-20         12222729      6769966       -44.61%
BenchmarkVariableString-20     73.5          73.4          -0.16%
BenchmarkListString-20         1827          1841          +0.77%

benchmark                      old allocs     new allocs     delta
BenchmarkVariableString-20     3              3              +0.00%
BenchmarkListString-20         31             31             +0.00%

benchmark                      old bytes     new bytes     delta
BenchmarkVariableString-20     72            72            +0.00%
BenchmarkListString-20         1473          1473          +0.00%

Fixes #53261

Change-Id: I4133bed2f8df16d398b707fb9509230325765c57
Reviewed-on: https://go-review.googlesource.com/c/go/+/421883
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
Rob Pike 2022-06-16 17:35:05 +10:00
parent 3a067b288e
commit 36760ca9fd
4 changed files with 134 additions and 135 deletions

View file

@ -111,20 +111,26 @@ type stateFn func(*lexer) stateFn
// lexer holds the state of the scanner. // lexer holds the state of the scanner.
type lexer struct { type lexer struct {
name string // the name of the input; used only for error reports name string // the name of the input; used only for error reports
input string // the string being scanned input string // the string being scanned
leftDelim string // start of action leftDelim string // start of action marker
rightDelim string // end of action rightDelim string // end of action marker
emitComment bool // emit itemComment tokens. pos Pos // current position in the input
pos Pos // current position in the input start Pos // start position of this item
start Pos // start position of this item atEOF bool // we have hit the end of input and returned eof
atEOF bool // we have hit the end of input and returned eof parenDepth int // nesting depth of ( ) exprs
items chan item // channel of scanned items line int // 1+number of newlines seen
parenDepth int // nesting depth of ( ) exprs startLine int // start line of this item
line int // 1+number of newlines seen item item // item to return to parser
startLine int // start line of this item insideAction bool // are we inside an action?
breakOK bool // break keyword allowed options lexOptions
continueOK bool // continue keyword allowed }
// lexOptions control behavior of the lexer. All default to false.
type lexOptions struct {
emitComment bool // emit itemComment tokens.
breakOK bool // break keyword allowed
continueOK bool // continue keyword allowed
} }
// next returns the next rune in the input. // next returns the next rune in the input.
@ -160,14 +166,29 @@ func (l *lexer) backup() {
} }
} }
// emit passes an item back to the client. // thisItem returns the item at the current input point with the specified type
func (l *lexer) emit(t itemType) { // and advances the input.
l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine} func (l *lexer) thisItem(t itemType) item {
i := item{t, l.start, l.input[l.start:l.pos], l.startLine}
l.start = l.pos l.start = l.pos
l.startLine = l.line l.startLine = l.line
return i
}
// emit passes the trailing text as an item back to the parser.
func (l *lexer) emit(t itemType) stateFn {
return l.emitItem(l.thisItem(t))
}
// emitItem passes the specified item to the parser.
func (l *lexer) emitItem(i item) stateFn {
l.item = i
return nil
} }
// ignore skips over the pending input before this point. // ignore skips over the pending input before this point.
// It tracks newlines in the ignored text, so use it only
// for text that is skipped without calling l.next.
func (l *lexer) ignore() { func (l *lexer) ignore() {
l.line += strings.Count(l.input[l.start:l.pos], "\n") l.line += strings.Count(l.input[l.start:l.pos], "\n")
l.start = l.pos l.start = l.pos
@ -193,25 +214,31 @@ func (l *lexer) acceptRun(valid string) {
// errorf returns an error token and terminates the scan by passing // errorf returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.nextItem. // back a nil pointer that will be the next state, terminating l.nextItem.
func (l *lexer) errorf(format string, args ...any) stateFn { func (l *lexer) errorf(format string, args ...any) stateFn {
l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine} l.item = item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
l.start = 0
l.pos = 0
l.input = l.input[:0]
return nil return nil
} }
// nextItem returns the next item from the input. // nextItem returns the next item from the input.
// Called by the parser, not in the lexing goroutine. // Called by the parser, not in the lexing goroutine.
func (l *lexer) nextItem() item { func (l *lexer) nextItem() item {
return <-l.items l.item = item{itemEOF, l.pos, "EOF", l.startLine}
} state := lexText
if l.insideAction {
// drain drains the output so the lexing goroutine will exit. state = lexInsideAction
// Called by the parser, not in the lexing goroutine. }
func (l *lexer) drain() { for {
for range l.items { state = state(l)
if state == nil {
return l.item
}
} }
} }
// lex creates a new scanner for the input string. // lex creates a new scanner for the input string.
func lex(name, input, left, right string, emitComment, breakOK, continueOK bool) *lexer { func lex(name, input, left, right string) *lexer {
if left == "" { if left == "" {
left = leftDelim left = leftDelim
} }
@ -219,29 +246,17 @@ func lex(name, input, left, right string, emitComment, breakOK, continueOK bool)
right = rightDelim right = rightDelim
} }
l := &lexer{ l := &lexer{
name: name, name: name,
input: input, input: input,
leftDelim: left, leftDelim: left,
rightDelim: right, rightDelim: right,
emitComment: emitComment, line: 1,
breakOK: breakOK, startLine: 1,
continueOK: continueOK, insideAction: false,
items: make(chan item),
line: 1,
startLine: 1,
} }
go l.run()
return l return l
} }
// run runs the state machine for the lexer.
func (l *lexer) run() {
for state := lexText; state != nil; {
state = state(l)
}
close(l.items)
}
// state functions // state functions
const ( const (
@ -254,29 +269,32 @@ const (
// lexText scans until an opening action delimiter, "{{". // lexText scans until an opening action delimiter, "{{".
func lexText(l *lexer) stateFn { func lexText(l *lexer) stateFn {
if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 { if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
ldn := Pos(len(l.leftDelim)) if x > 0 {
l.pos += Pos(x) l.pos += Pos(x)
trimLength := Pos(0) // Do we trim any trailing space?
if hasLeftTrimMarker(l.input[l.pos+ldn:]) { trimLength := Pos(0)
trimLength = rightTrimLength(l.input[l.start:l.pos]) delimEnd := l.pos + Pos(len(l.leftDelim))
} if hasLeftTrimMarker(l.input[delimEnd:]) {
l.pos -= trimLength trimLength = rightTrimLength(l.input[l.start:l.pos])
if l.pos > l.start { }
l.pos -= trimLength
l.line += strings.Count(l.input[l.start:l.pos], "\n") l.line += strings.Count(l.input[l.start:l.pos], "\n")
l.emit(itemText) i := l.thisItem(itemText)
l.pos += trimLength
l.ignore()
if len(i.val) > 0 {
return l.emitItem(i)
}
} }
l.pos += trimLength
l.ignore()
return lexLeftDelim return lexLeftDelim
} }
l.pos = Pos(len(l.input)) l.pos = Pos(len(l.input))
// Correctly reached EOF. // Correctly reached EOF.
if l.pos > l.start { if l.pos > l.start {
l.line += strings.Count(l.input[l.start:l.pos], "\n") l.line += strings.Count(l.input[l.start:l.pos], "\n")
l.emit(itemText) return l.emit(itemText)
} }
l.emit(itemEOF) return l.emit(itemEOF)
return nil
} }
// rightTrimLength returns the length of the spaces at the end of the string. // rightTrimLength returns the length of the spaces at the end of the string.
@ -301,6 +319,7 @@ func leftTrimLength(s string) Pos {
} }
// lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker. // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker.
// (The text to be trimmed has already been emitted.)
func lexLeftDelim(l *lexer) stateFn { func lexLeftDelim(l *lexer) stateFn {
l.pos += Pos(len(l.leftDelim)) l.pos += Pos(len(l.leftDelim))
trimSpace := hasLeftTrimMarker(l.input[l.pos:]) trimSpace := hasLeftTrimMarker(l.input[l.pos:])
@ -313,28 +332,27 @@ func lexLeftDelim(l *lexer) stateFn {
l.ignore() l.ignore()
return lexComment return lexComment
} }
l.emit(itemLeftDelim) i := l.thisItem(itemLeftDelim)
l.insideAction = true
l.pos += afterMarker l.pos += afterMarker
l.ignore() l.ignore()
l.parenDepth = 0 l.parenDepth = 0
return lexInsideAction return l.emitItem(i)
} }
// lexComment scans a comment. The left comment marker is known to be present. // lexComment scans a comment. The left comment marker is known to be present.
func lexComment(l *lexer) stateFn { func lexComment(l *lexer) stateFn {
l.pos += Pos(len(leftComment)) l.pos += Pos(len(leftComment))
i := strings.Index(l.input[l.pos:], rightComment) x := strings.Index(l.input[l.pos:], rightComment)
if i < 0 { if x < 0 {
return l.errorf("unclosed comment") return l.errorf("unclosed comment")
} }
l.pos += Pos(i + len(rightComment)) l.pos += Pos(x + len(rightComment))
delim, trimSpace := l.atRightDelim() delim, trimSpace := l.atRightDelim()
if !delim { if !delim {
return l.errorf("comment ends before closing delimiter") return l.errorf("comment ends before closing delimiter")
} }
if l.emitComment { i := l.thisItem(itemComment)
l.emit(itemComment)
}
if trimSpace { if trimSpace {
l.pos += trimMarkerLen l.pos += trimMarkerLen
} }
@ -343,6 +361,9 @@ func lexComment(l *lexer) stateFn {
l.pos += leftTrimLength(l.input[l.pos:]) l.pos += leftTrimLength(l.input[l.pos:])
} }
l.ignore() l.ignore()
if l.options.emitComment {
return l.emitItem(i)
}
return lexText return lexText
} }
@ -354,12 +375,13 @@ func lexRightDelim(l *lexer) stateFn {
l.ignore() l.ignore()
} }
l.pos += Pos(len(l.rightDelim)) l.pos += Pos(len(l.rightDelim))
l.emit(itemRightDelim) i := l.thisItem(itemRightDelim)
if trimSpace { if trimSpace {
l.pos += leftTrimLength(l.input[l.pos:]) l.pos += leftTrimLength(l.input[l.pos:])
l.ignore() l.ignore()
} }
return lexText l.insideAction = false
return l.emitItem(i)
} }
// lexInsideAction scans the elements inside action delimiters. // lexInsideAction scans the elements inside action delimiters.
@ -381,14 +403,14 @@ func lexInsideAction(l *lexer) stateFn {
l.backup() // Put space back in case we have " -}}". l.backup() // Put space back in case we have " -}}".
return lexSpace return lexSpace
case r == '=': case r == '=':
l.emit(itemAssign) return l.emit(itemAssign)
case r == ':': case r == ':':
if l.next() != '=' { if l.next() != '=' {
return l.errorf("expected :=") return l.errorf("expected :=")
} }
l.emit(itemDeclare) return l.emit(itemDeclare)
case r == '|': case r == '|':
l.emit(itemPipe) return l.emit(itemPipe)
case r == '"': case r == '"':
return lexQuote return lexQuote
case r == '`': case r == '`':
@ -413,20 +435,19 @@ func lexInsideAction(l *lexer) stateFn {
l.backup() l.backup()
return lexIdentifier return lexIdentifier
case r == '(': case r == '(':
l.emit(itemLeftParen)
l.parenDepth++ l.parenDepth++
return l.emit(itemLeftParen)
case r == ')': case r == ')':
l.emit(itemRightParen)
l.parenDepth-- l.parenDepth--
if l.parenDepth < 0 { if l.parenDepth < 0 {
return l.errorf("unexpected right paren %#U", r) return l.errorf("unexpected right paren")
} }
return l.emit(itemRightParen)
case r <= unicode.MaxASCII && unicode.IsPrint(r): case r <= unicode.MaxASCII && unicode.IsPrint(r):
l.emit(itemChar) return l.emit(itemChar)
default: default:
return l.errorf("unrecognized character in action: %#U", r) return l.errorf("unrecognized character in action: %#U", r)
} }
return lexInsideAction
} }
// lexSpace scans a run of space characters. // lexSpace scans a run of space characters.
@ -451,13 +472,11 @@ func lexSpace(l *lexer) stateFn {
return lexRightDelim // On the delim, so go right to that. return lexRightDelim // On the delim, so go right to that.
} }
} }
l.emit(itemSpace) return l.emit(itemSpace)
return lexInsideAction
} }
// lexIdentifier scans an alphanumeric. // lexIdentifier scans an alphanumeric.
func lexIdentifier(l *lexer) stateFn { func lexIdentifier(l *lexer) stateFn {
Loop:
for { for {
switch r := l.next(); { switch r := l.next(); {
case isAlphaNumeric(r): case isAlphaNumeric(r):
@ -471,22 +490,19 @@ Loop:
switch { switch {
case key[word] > itemKeyword: case key[word] > itemKeyword:
item := key[word] item := key[word]
if item == itemBreak && !l.breakOK || item == itemContinue && !l.continueOK { if item == itemBreak && !l.options.breakOK || item == itemContinue && !l.options.continueOK {
l.emit(itemIdentifier) return l.emit(itemIdentifier)
} else {
l.emit(item)
} }
return l.emit(item)
case word[0] == '.': case word[0] == '.':
l.emit(itemField) return l.emit(itemField)
case word == "true", word == "false": case word == "true", word == "false":
l.emit(itemBool) return l.emit(itemBool)
default: default:
l.emit(itemIdentifier) return l.emit(itemIdentifier)
} }
break Loop
} }
} }
return lexInsideAction
} }
// lexField scans a field: .Alphanumeric. // lexField scans a field: .Alphanumeric.
@ -499,8 +515,7 @@ func lexField(l *lexer) stateFn {
// The $ has been scanned. // The $ has been scanned.
func lexVariable(l *lexer) stateFn { func lexVariable(l *lexer) stateFn {
if l.atTerminator() { // Nothing interesting follows -> "$". if l.atTerminator() { // Nothing interesting follows -> "$".
l.emit(itemVariable) return l.emit(itemVariable)
return lexInsideAction
} }
return lexFieldOrVariable(l, itemVariable) return lexFieldOrVariable(l, itemVariable)
} }
@ -510,11 +525,9 @@ func lexVariable(l *lexer) stateFn {
func lexFieldOrVariable(l *lexer, typ itemType) stateFn { func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
if l.atTerminator() { // Nothing interesting follows -> "." or "$". if l.atTerminator() { // Nothing interesting follows -> "." or "$".
if typ == itemVariable { if typ == itemVariable {
l.emit(itemVariable) return l.emit(itemVariable)
} else {
l.emit(itemDot)
} }
return lexInsideAction return l.emit(itemDot)
} }
var r rune var r rune
for { for {
@ -527,8 +540,7 @@ func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
if !l.atTerminator() { if !l.atTerminator() {
return l.errorf("bad character %#U", r) return l.errorf("bad character %#U", r)
} }
l.emit(typ) return l.emit(typ)
return lexInsideAction
} }
// atTerminator reports whether the input is at valid termination character to // atTerminator reports whether the input is at valid termination character to
@ -564,8 +576,7 @@ Loop:
break Loop break Loop
} }
} }
l.emit(itemCharConstant) return l.emit(itemCharConstant)
return lexInsideAction
} }
// lexNumber scans a number: decimal, octal, hex, float, or imaginary. This // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
@ -581,11 +592,9 @@ func lexNumber(l *lexer) stateFn {
if !l.scanNumber() || l.input[l.pos-1] != 'i' { if !l.scanNumber() || l.input[l.pos-1] != 'i' {
return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
} }
l.emit(itemComplex) return l.emit(itemComplex)
} else {
l.emit(itemNumber)
} }
return lexInsideAction return l.emit(itemNumber)
} }
func (l *lexer) scanNumber() bool { func (l *lexer) scanNumber() bool {
@ -641,8 +650,7 @@ Loop:
break Loop break Loop
} }
} }
l.emit(itemString) return l.emit(itemString)
return lexInsideAction
} }
// lexRawQuote scans a raw quoted string. // lexRawQuote scans a raw quoted string.
@ -656,8 +664,7 @@ Loop:
break Loop break Loop
} }
} }
l.emit(itemRawString) return l.emit(itemRawString)
return lexInsideAction
} }
// isSpace reports whether r is a space character. // isSpace reports whether r is a space character.

View file

@ -359,8 +359,7 @@ var lexTests = []lexTest{
{"extra right paren", "{{3)}}", []item{ {"extra right paren", "{{3)}}", []item{
tLeft, tLeft,
mkItem(itemNumber, "3"), mkItem(itemNumber, "3"),
tRpar, mkItem(itemError, "unexpected right paren"),
mkItem(itemError, `unexpected right paren U+0029 ')'`),
}}, }},
// Fixed bugs // Fixed bugs
@ -394,7 +393,12 @@ var lexTests = []lexTest{
// collect gathers the emitted items into a slice. // collect gathers the emitted items into a slice.
func collect(t *lexTest, left, right string) (items []item) { func collect(t *lexTest, left, right string) (items []item) {
l := lex(t.name, t.input, left, right, true, true, true) l := lex(t.name, t.input, left, right)
l.options = lexOptions{
emitComment: true,
breakOK: true,
continueOK: true,
}
for { for {
item := l.nextItem() item := l.nextItem()
items = append(items, item) items = append(items, item)
@ -431,7 +435,9 @@ func TestLex(t *testing.T) {
items := collect(&test, "", "") items := collect(&test, "", "")
if !equal(items, test.items, false) { if !equal(items, test.items, false) {
t.Errorf("%s: got\n\t%+v\nexpected\n\t%v", test.name, items, test.items) t.Errorf("%s: got\n\t%+v\nexpected\n\t%v", test.name, items, test.items)
return // TODO
} }
t.Log(test.name, "OK")
} }
} }
@ -546,22 +552,6 @@ func TestPos(t *testing.T) {
} }
} }
// Test that an error shuts down the lexing goroutine.
func TestShutdown(t *testing.T) {
// We need to duplicate template.Parse here to hold on to the lexer.
const text = "erroneous{{define}}{{else}}1234"
lexer := lex("foo", text, "{{", "}}", false, true, true)
_, err := New("root").parseLexer(lexer)
if err == nil {
t.Fatalf("expected error")
}
// The error should have drained the input. Therefore, the lexer should be shut down.
token, ok := <-lexer.items
if ok {
t.Fatalf("input was not drained; got %v", token)
}
}
// parseLexer is a local version of parse that lets us pass in the lexer instead of building it. // parseLexer is a local version of parse that lets us pass in the lexer instead of building it.
// We expect an error, so the tree set and funcs list are explicitly nil. // We expect an error, so the tree set and funcs list are explicitly nil.
func (t *Tree) parseLexer(lex *lexer) (tree *Tree, err error) { func (t *Tree) parseLexer(lex *lexer) (tree *Tree, err error) {

View file

@ -210,7 +210,6 @@ func (t *Tree) recover(errp *error) {
panic(e) panic(e)
} }
if t != nil { if t != nil {
t.lex.drain()
t.stopParse() t.stopParse()
} }
*errp = e.(error) *errp = e.(error)
@ -241,10 +240,12 @@ func (t *Tree) stopParse() {
func (t *Tree) Parse(text, leftDelim, rightDelim string, treeSet map[string]*Tree, funcs ...map[string]any) (tree *Tree, err error) { func (t *Tree) Parse(text, leftDelim, rightDelim string, treeSet map[string]*Tree, funcs ...map[string]any) (tree *Tree, err error) {
defer t.recover(&err) defer t.recover(&err)
t.ParseName = t.Name t.ParseName = t.Name
emitComment := t.Mode&ParseComments != 0 lexer := lex(t.Name, text, leftDelim, rightDelim)
breakOK := !t.hasFunction("break") lexer.options = lexOptions{
continueOK := !t.hasFunction("continue") emitComment: t.Mode&ParseComments != 0,
lexer := lex(t.Name, text, leftDelim, rightDelim, emitComment, breakOK, continueOK) breakOK: !t.hasFunction("break"),
continueOK: !t.hasFunction("continue"),
}
t.startParse(funcs, lexer, treeSet) t.startParse(funcs, lexer, treeSet)
t.text = text t.text = text
t.parse() t.parse()

View file

@ -489,7 +489,7 @@ var errorTests = []parseTest{
hasError, `unclosed left paren`}, hasError, `unclosed left paren`},
{"rparen", {"rparen",
"{{.X 1 2 3 ) }}", "{{.X 1 2 3 ) }}",
hasError, `unexpected ")" in command`}, hasError, "unexpected right paren"},
{"rparen2", {"rparen2",
"{{(.X 1 2 3", "{{(.X 1 2 3",
hasError, `unclosed action`}, hasError, `unclosed action`},
@ -597,7 +597,8 @@ func TestBlock(t *testing.T) {
} }
func TestLineNum(t *testing.T) { func TestLineNum(t *testing.T) {
const count = 100 // const count = 100
const count = 3
text := strings.Repeat("{{printf 1234}}\n", count) text := strings.Repeat("{{printf 1234}}\n", count)
tree, err := New("bench").Parse(text, "", "", make(map[string]*Tree), builtins) tree, err := New("bench").Parse(text, "", "", make(map[string]*Tree), builtins)
if err != nil { if err != nil {
@ -611,11 +612,11 @@ func TestLineNum(t *testing.T) {
// Action first. // Action first.
action := nodes[i].(*ActionNode) action := nodes[i].(*ActionNode)
if action.Line != line { if action.Line != line {
t.Fatalf("line %d: action is line %d", line, action.Line) t.Errorf("line %d: action is line %d", line, action.Line)
} }
pipe := action.Pipe pipe := action.Pipe
if pipe.Line != line { if pipe.Line != line {
t.Fatalf("line %d: pipe is line %d", line, pipe.Line) t.Errorf("line %d: pipe is line %d", line, pipe.Line)
} }
} }
} }