From 36760ca9fd3eaf2c79efef48e533b66da6c542bd Mon Sep 17 00:00:00 2001
From: Rob Pike <r@golang.org>
Date: Thu, 16 Jun 2022 17:35:05 +1000
Subject: [PATCH] text/template/parse: simplify I/O in lexing

The concurrent model for delivering tokens was fine for pedagogy,
but has caused a few problems as the package has evolved (that is,
got more complicated). It's easy to eliminate it, simplifying or
removing some of the hacks used to work around these prolems.

The old lexer would deliver tokens over a channel to the parsing
goroutine, and continue running until EOF. In this rewrite, we
instead run the machine until a token is ready, and shut it down
until the next token is needed. The mechanism is just to return nil
as the state function, which requires a bit more threading of return
values through the state functions but is not difficult. The change
is modest.

A couple of error messages change, but otherwise the change has no
external effect. This is just an internal cleanup, long overdue.

benchmark                      old ns/op     new ns/op     delta
BenchmarkParseLarge-20         12222729      6769966       -44.61%
BenchmarkVariableString-20     73.5          73.4          -0.16%
BenchmarkListString-20         1827          1841          +0.77%

benchmark                      old allocs     new allocs     delta
BenchmarkVariableString-20     3              3              +0.00%
BenchmarkListString-20         31             31             +0.00%

benchmark                      old bytes     new bytes     delta
BenchmarkVariableString-20     72            72            +0.00%
BenchmarkListString-20         1473          1473          +0.00%

Fixes #53261

Change-Id: I4133bed2f8df16d398b707fb9509230325765c57
Reviewed-on: https://go-review.googlesource.com/c/go/+/421883
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
---
 src/text/template/parse/lex.go        | 221 +++++++++++++-------------
 src/text/template/parse/lex_test.go   |  28 ++--
 src/text/template/parse/parse.go      |  11 +-
 src/text/template/parse/parse_test.go |   9 +-
 4 files changed, 134 insertions(+), 135 deletions(-)

diff --git a/src/text/template/parse/lex.go b/src/text/template/parse/lex.go
index 29403dd947..3562e0abc9 100644
--- a/src/text/template/parse/lex.go
+++ b/src/text/template/parse/lex.go
@@ -111,20 +111,26 @@ type stateFn func(*lexer) stateFn
 
 // lexer holds the state of the scanner.
 type lexer struct {
-	name        string    // the name of the input; used only for error reports
-	input       string    // the string being scanned
-	leftDelim   string    // start of action
-	rightDelim  string    // end of action
-	emitComment bool      // emit itemComment tokens.
-	pos         Pos       // current position in the input
-	start       Pos       // start position of this item
-	atEOF       bool      // we have hit the end of input and returned eof
-	items       chan item // channel of scanned items
-	parenDepth  int       // nesting depth of ( ) exprs
-	line        int       // 1+number of newlines seen
-	startLine   int       // start line of this item
-	breakOK     bool      // break keyword allowed
-	continueOK  bool      // continue keyword allowed
+	name         string // the name of the input; used only for error reports
+	input        string // the string being scanned
+	leftDelim    string // start of action marker
+	rightDelim   string // end of action marker
+	pos          Pos    // current position in the input
+	start        Pos    // start position of this item
+	atEOF        bool   // we have hit the end of input and returned eof
+	parenDepth   int    // nesting depth of ( ) exprs
+	line         int    // 1+number of newlines seen
+	startLine    int    // start line of this item
+	item         item   // item to return to parser
+	insideAction bool   // are we inside an action?
+	options      lexOptions
+}
+
+// lexOptions control behavior of the lexer. All default to false.
+type lexOptions struct {
+	emitComment bool // emit itemComment tokens.
+	breakOK     bool // break keyword allowed
+	continueOK  bool // continue keyword allowed
 }
 
 // next returns the next rune in the input.
@@ -160,14 +166,29 @@ func (l *lexer) backup() {
 	}
 }
 
-// emit passes an item back to the client.
-func (l *lexer) emit(t itemType) {
-	l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine}
+// thisItem returns the item at the current input point with the specified type
+// and advances the input.
+func (l *lexer) thisItem(t itemType) item {
+	i := item{t, l.start, l.input[l.start:l.pos], l.startLine}
 	l.start = l.pos
 	l.startLine = l.line
+	return i
+}
+
+// emit passes the trailing text as an item back to the parser.
+func (l *lexer) emit(t itemType) stateFn {
+	return l.emitItem(l.thisItem(t))
+}
+
+// emitItem passes the specified item to the parser.
+func (l *lexer) emitItem(i item) stateFn {
+	l.item = i
+	return nil
 }
 
 // ignore skips over the pending input before this point.
+// It tracks newlines in the ignored text, so use it only
+// for text that is skipped without calling l.next.
 func (l *lexer) ignore() {
 	l.line += strings.Count(l.input[l.start:l.pos], "\n")
 	l.start = l.pos
@@ -193,25 +214,31 @@ func (l *lexer) acceptRun(valid string) {
 // errorf returns an error token and terminates the scan by passing
 // back a nil pointer that will be the next state, terminating l.nextItem.
 func (l *lexer) errorf(format string, args ...any) stateFn {
-	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
+	l.item = item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
+	l.start = 0
+	l.pos = 0
+	l.input = l.input[:0]
 	return nil
 }
 
 // nextItem returns the next item from the input.
 // Called by the parser, not in the lexing goroutine.
 func (l *lexer) nextItem() item {
-	return <-l.items
-}
-
-// drain drains the output so the lexing goroutine will exit.
-// Called by the parser, not in the lexing goroutine.
-func (l *lexer) drain() {
-	for range l.items {
+	l.item = item{itemEOF, l.pos, "EOF", l.startLine}
+	state := lexText
+	if l.insideAction {
+		state = lexInsideAction
+	}
+	for {
+		state = state(l)
+		if state == nil {
+			return l.item
+		}
 	}
 }
 
 // lex creates a new scanner for the input string.
-func lex(name, input, left, right string, emitComment, breakOK, continueOK bool) *lexer {
+func lex(name, input, left, right string) *lexer {
 	if left == "" {
 		left = leftDelim
 	}
@@ -219,29 +246,17 @@ func lex(name, input, left, right string, emitComment, breakOK, continueOK bool)
 		right = rightDelim
 	}
 	l := &lexer{
-		name:        name,
-		input:       input,
-		leftDelim:   left,
-		rightDelim:  right,
-		emitComment: emitComment,
-		breakOK:     breakOK,
-		continueOK:  continueOK,
-		items:       make(chan item),
-		line:        1,
-		startLine:   1,
+		name:         name,
+		input:        input,
+		leftDelim:    left,
+		rightDelim:   right,
+		line:         1,
+		startLine:    1,
+		insideAction: false,
 	}
-	go l.run()
 	return l
 }
 
-// run runs the state machine for the lexer.
-func (l *lexer) run() {
-	for state := lexText; state != nil; {
-		state = state(l)
-	}
-	close(l.items)
-}
-
 // state functions
 
 const (
@@ -254,29 +269,32 @@ const (
 // lexText scans until an opening action delimiter, "{{".
 func lexText(l *lexer) stateFn {
 	if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
-		ldn := Pos(len(l.leftDelim))
-		l.pos += Pos(x)
-		trimLength := Pos(0)
-		if hasLeftTrimMarker(l.input[l.pos+ldn:]) {
-			trimLength = rightTrimLength(l.input[l.start:l.pos])
-		}
-		l.pos -= trimLength
-		if l.pos > l.start {
+		if x > 0 {
+			l.pos += Pos(x)
+			// Do we trim any trailing space?
+			trimLength := Pos(0)
+			delimEnd := l.pos + Pos(len(l.leftDelim))
+			if hasLeftTrimMarker(l.input[delimEnd:]) {
+				trimLength = rightTrimLength(l.input[l.start:l.pos])
+			}
+			l.pos -= trimLength
 			l.line += strings.Count(l.input[l.start:l.pos], "\n")
-			l.emit(itemText)
+			i := l.thisItem(itemText)
+			l.pos += trimLength
+			l.ignore()
+			if len(i.val) > 0 {
+				return l.emitItem(i)
+			}
 		}
-		l.pos += trimLength
-		l.ignore()
 		return lexLeftDelim
 	}
 	l.pos = Pos(len(l.input))
 	// Correctly reached EOF.
 	if l.pos > l.start {
 		l.line += strings.Count(l.input[l.start:l.pos], "\n")
-		l.emit(itemText)
+		return l.emit(itemText)
 	}
-	l.emit(itemEOF)
-	return nil
+	return l.emit(itemEOF)
 }
 
 // rightTrimLength returns the length of the spaces at the end of the string.
@@ -301,6 +319,7 @@ func leftTrimLength(s string) Pos {
 }
 
 // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker.
+// (The text to be trimmed has already been emitted.)
 func lexLeftDelim(l *lexer) stateFn {
 	l.pos += Pos(len(l.leftDelim))
 	trimSpace := hasLeftTrimMarker(l.input[l.pos:])
@@ -313,28 +332,27 @@ func lexLeftDelim(l *lexer) stateFn {
 		l.ignore()
 		return lexComment
 	}
-	l.emit(itemLeftDelim)
+	i := l.thisItem(itemLeftDelim)
+	l.insideAction = true
 	l.pos += afterMarker
 	l.ignore()
 	l.parenDepth = 0
-	return lexInsideAction
+	return l.emitItem(i)
 }
 
 // lexComment scans a comment. The left comment marker is known to be present.
 func lexComment(l *lexer) stateFn {
 	l.pos += Pos(len(leftComment))
-	i := strings.Index(l.input[l.pos:], rightComment)
-	if i < 0 {
+	x := strings.Index(l.input[l.pos:], rightComment)
+	if x < 0 {
 		return l.errorf("unclosed comment")
 	}
-	l.pos += Pos(i + len(rightComment))
+	l.pos += Pos(x + len(rightComment))
 	delim, trimSpace := l.atRightDelim()
 	if !delim {
 		return l.errorf("comment ends before closing delimiter")
 	}
-	if l.emitComment {
-		l.emit(itemComment)
-	}
+	i := l.thisItem(itemComment)
 	if trimSpace {
 		l.pos += trimMarkerLen
 	}
@@ -343,6 +361,9 @@ func lexComment(l *lexer) stateFn {
 		l.pos += leftTrimLength(l.input[l.pos:])
 	}
 	l.ignore()
+	if l.options.emitComment {
+		return l.emitItem(i)
+	}
 	return lexText
 }
 
@@ -354,12 +375,13 @@ func lexRightDelim(l *lexer) stateFn {
 		l.ignore()
 	}
 	l.pos += Pos(len(l.rightDelim))
-	l.emit(itemRightDelim)
+	i := l.thisItem(itemRightDelim)
 	if trimSpace {
 		l.pos += leftTrimLength(l.input[l.pos:])
 		l.ignore()
 	}
-	return lexText
+	l.insideAction = false
+	return l.emitItem(i)
 }
 
 // lexInsideAction scans the elements inside action delimiters.
@@ -381,14 +403,14 @@ func lexInsideAction(l *lexer) stateFn {
 		l.backup() // Put space back in case we have " -}}".
 		return lexSpace
 	case r == '=':
-		l.emit(itemAssign)
+		return l.emit(itemAssign)
 	case r == ':':
 		if l.next() != '=' {
 			return l.errorf("expected :=")
 		}
-		l.emit(itemDeclare)
+		return l.emit(itemDeclare)
 	case r == '|':
-		l.emit(itemPipe)
+		return l.emit(itemPipe)
 	case r == '"':
 		return lexQuote
 	case r == '`':
@@ -413,20 +435,19 @@ func lexInsideAction(l *lexer) stateFn {
 		l.backup()
 		return lexIdentifier
 	case r == '(':
-		l.emit(itemLeftParen)
 		l.parenDepth++
+		return l.emit(itemLeftParen)
 	case r == ')':
-		l.emit(itemRightParen)
 		l.parenDepth--
 		if l.parenDepth < 0 {
-			return l.errorf("unexpected right paren %#U", r)
+			return l.errorf("unexpected right paren")
 		}
+		return l.emit(itemRightParen)
 	case r <= unicode.MaxASCII && unicode.IsPrint(r):
-		l.emit(itemChar)
+		return l.emit(itemChar)
 	default:
 		return l.errorf("unrecognized character in action: %#U", r)
 	}
-	return lexInsideAction
 }
 
 // lexSpace scans a run of space characters.
@@ -451,13 +472,11 @@ func lexSpace(l *lexer) stateFn {
 			return lexRightDelim // On the delim, so go right to that.
 		}
 	}
-	l.emit(itemSpace)
-	return lexInsideAction
+	return l.emit(itemSpace)
 }
 
 // lexIdentifier scans an alphanumeric.
 func lexIdentifier(l *lexer) stateFn {
-Loop:
 	for {
 		switch r := l.next(); {
 		case isAlphaNumeric(r):
@@ -471,22 +490,19 @@ Loop:
 			switch {
 			case key[word] > itemKeyword:
 				item := key[word]
-				if item == itemBreak && !l.breakOK || item == itemContinue && !l.continueOK {
-					l.emit(itemIdentifier)
-				} else {
-					l.emit(item)
+				if item == itemBreak && !l.options.breakOK || item == itemContinue && !l.options.continueOK {
+					return l.emit(itemIdentifier)
 				}
+				return l.emit(item)
 			case word[0] == '.':
-				l.emit(itemField)
+				return l.emit(itemField)
 			case word == "true", word == "false":
-				l.emit(itemBool)
+				return l.emit(itemBool)
 			default:
-				l.emit(itemIdentifier)
+				return l.emit(itemIdentifier)
 			}
-			break Loop
 		}
 	}
-	return lexInsideAction
 }
 
 // lexField scans a field: .Alphanumeric.
@@ -499,8 +515,7 @@ func lexField(l *lexer) stateFn {
 // The $ has been scanned.
 func lexVariable(l *lexer) stateFn {
 	if l.atTerminator() { // Nothing interesting follows -> "$".
-		l.emit(itemVariable)
-		return lexInsideAction
+		return l.emit(itemVariable)
 	}
 	return lexFieldOrVariable(l, itemVariable)
 }
@@ -510,11 +525,9 @@ func lexVariable(l *lexer) stateFn {
 func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
 	if l.atTerminator() { // Nothing interesting follows -> "." or "$".
 		if typ == itemVariable {
-			l.emit(itemVariable)
-		} else {
-			l.emit(itemDot)
+			return l.emit(itemVariable)
 		}
-		return lexInsideAction
+		return l.emit(itemDot)
 	}
 	var r rune
 	for {
@@ -527,8 +540,7 @@ func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
 	if !l.atTerminator() {
 		return l.errorf("bad character %#U", r)
 	}
-	l.emit(typ)
-	return lexInsideAction
+	return l.emit(typ)
 }
 
 // atTerminator reports whether the input is at valid termination character to
@@ -564,8 +576,7 @@ Loop:
 			break Loop
 		}
 	}
-	l.emit(itemCharConstant)
-	return lexInsideAction
+	return l.emit(itemCharConstant)
 }
 
 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
@@ -581,11 +592,9 @@ func lexNumber(l *lexer) stateFn {
 		if !l.scanNumber() || l.input[l.pos-1] != 'i' {
 			return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
 		}
-		l.emit(itemComplex)
-	} else {
-		l.emit(itemNumber)
+		return l.emit(itemComplex)
 	}
-	return lexInsideAction
+	return l.emit(itemNumber)
 }
 
 func (l *lexer) scanNumber() bool {
@@ -641,8 +650,7 @@ Loop:
 			break Loop
 		}
 	}
-	l.emit(itemString)
-	return lexInsideAction
+	return l.emit(itemString)
 }
 
 // lexRawQuote scans a raw quoted string.
@@ -656,8 +664,7 @@ Loop:
 			break Loop
 		}
 	}
-	l.emit(itemRawString)
-	return lexInsideAction
+	return l.emit(itemRawString)
 }
 
 // isSpace reports whether r is a space character.
diff --git a/src/text/template/parse/lex_test.go b/src/text/template/parse/lex_test.go
index c5f429667c..947889a80b 100644
--- a/src/text/template/parse/lex_test.go
+++ b/src/text/template/parse/lex_test.go
@@ -359,8 +359,7 @@ var lexTests = []lexTest{
 	{"extra right paren", "{{3)}}", []item{
 		tLeft,
 		mkItem(itemNumber, "3"),
-		tRpar,
-		mkItem(itemError, `unexpected right paren U+0029 ')'`),
+		mkItem(itemError, "unexpected right paren"),
 	}},
 
 	// Fixed bugs
@@ -394,7 +393,12 @@ var lexTests = []lexTest{
 
 // collect gathers the emitted items into a slice.
 func collect(t *lexTest, left, right string) (items []item) {
-	l := lex(t.name, t.input, left, right, true, true, true)
+	l := lex(t.name, t.input, left, right)
+	l.options = lexOptions{
+		emitComment: true,
+		breakOK:     true,
+		continueOK:  true,
+	}
 	for {
 		item := l.nextItem()
 		items = append(items, item)
@@ -431,7 +435,9 @@ func TestLex(t *testing.T) {
 		items := collect(&test, "", "")
 		if !equal(items, test.items, false) {
 			t.Errorf("%s: got\n\t%+v\nexpected\n\t%v", test.name, items, test.items)
+			return // TODO
 		}
+		t.Log(test.name, "OK")
 	}
 }
 
@@ -546,22 +552,6 @@ func TestPos(t *testing.T) {
 	}
 }
 
-// Test that an error shuts down the lexing goroutine.
-func TestShutdown(t *testing.T) {
-	// We need to duplicate template.Parse here to hold on to the lexer.
-	const text = "erroneous{{define}}{{else}}1234"
-	lexer := lex("foo", text, "{{", "}}", false, true, true)
-	_, err := New("root").parseLexer(lexer)
-	if err == nil {
-		t.Fatalf("expected error")
-	}
-	// The error should have drained the input. Therefore, the lexer should be shut down.
-	token, ok := <-lexer.items
-	if ok {
-		t.Fatalf("input was not drained; got %v", token)
-	}
-}
-
 // parseLexer is a local version of parse that lets us pass in the lexer instead of building it.
 // We expect an error, so the tree set and funcs list are explicitly nil.
 func (t *Tree) parseLexer(lex *lexer) (tree *Tree, err error) {
diff --git a/src/text/template/parse/parse.go b/src/text/template/parse/parse.go
index 00c258ad5d..87b7618f75 100644
--- a/src/text/template/parse/parse.go
+++ b/src/text/template/parse/parse.go
@@ -210,7 +210,6 @@ func (t *Tree) recover(errp *error) {
 			panic(e)
 		}
 		if t != nil {
-			t.lex.drain()
 			t.stopParse()
 		}
 		*errp = e.(error)
@@ -241,10 +240,12 @@ func (t *Tree) stopParse() {
 func (t *Tree) Parse(text, leftDelim, rightDelim string, treeSet map[string]*Tree, funcs ...map[string]any) (tree *Tree, err error) {
 	defer t.recover(&err)
 	t.ParseName = t.Name
-	emitComment := t.Mode&ParseComments != 0
-	breakOK := !t.hasFunction("break")
-	continueOK := !t.hasFunction("continue")
-	lexer := lex(t.Name, text, leftDelim, rightDelim, emitComment, breakOK, continueOK)
+	lexer := lex(t.Name, text, leftDelim, rightDelim)
+	lexer.options = lexOptions{
+		emitComment: t.Mode&ParseComments != 0,
+		breakOK:     !t.hasFunction("break"),
+		continueOK:  !t.hasFunction("continue"),
+	}
 	t.startParse(funcs, lexer, treeSet)
 	t.text = text
 	t.parse()
diff --git a/src/text/template/parse/parse_test.go b/src/text/template/parse/parse_test.go
index fdb25d78f5..f6a9fdd872 100644
--- a/src/text/template/parse/parse_test.go
+++ b/src/text/template/parse/parse_test.go
@@ -489,7 +489,7 @@ var errorTests = []parseTest{
 		hasError, `unclosed left paren`},
 	{"rparen",
 		"{{.X 1 2 3 ) }}",
-		hasError, `unexpected ")" in command`},
+		hasError, "unexpected right paren"},
 	{"rparen2",
 		"{{(.X 1 2 3",
 		hasError, `unclosed action`},
@@ -597,7 +597,8 @@ func TestBlock(t *testing.T) {
 }
 
 func TestLineNum(t *testing.T) {
-	const count = 100
+	// const count = 100
+	const count = 3
 	text := strings.Repeat("{{printf 1234}}\n", count)
 	tree, err := New("bench").Parse(text, "", "", make(map[string]*Tree), builtins)
 	if err != nil {
@@ -611,11 +612,11 @@ func TestLineNum(t *testing.T) {
 		// Action first.
 		action := nodes[i].(*ActionNode)
 		if action.Line != line {
-			t.Fatalf("line %d: action is line %d", line, action.Line)
+			t.Errorf("line %d: action is line %d", line, action.Line)
 		}
 		pipe := action.Pipe
 		if pipe.Line != line {
-			t.Fatalf("line %d: pipe is line %d", line, pipe.Line)
+			t.Errorf("line %d: pipe is line %d", line, pipe.Line)
 		}
 	}
 }