text/template/parse: simplify I/O in lexing

The concurrent model for delivering tokens was fine for pedagogy,
but has caused a few problems as the package has evolved (that is,
got more complicated). It's easy to eliminate it, simplifying or
removing some of the hacks used to work around these prolems.

The old lexer would deliver tokens over a channel to the parsing
goroutine, and continue running until EOF. In this rewrite, we
instead run the machine until a token is ready, and shut it down
until the next token is needed. The mechanism is just to return nil
as the state function, which requires a bit more threading of return
values through the state functions but is not difficult. The change
is modest.

A couple of error messages change, but otherwise the change has no
external effect. This is just an internal cleanup, long overdue.

benchmark                      old ns/op     new ns/op     delta
BenchmarkParseLarge-20         12222729      6769966       -44.61%
BenchmarkVariableString-20     73.5          73.4          -0.16%
BenchmarkListString-20         1827          1841          +0.77%

benchmark                      old allocs     new allocs     delta
BenchmarkVariableString-20     3              3              +0.00%
BenchmarkListString-20         31             31             +0.00%

benchmark                      old bytes     new bytes     delta
BenchmarkVariableString-20     72            72            +0.00%
BenchmarkListString-20         1473          1473          +0.00%

Fixes #53261

Change-Id: I4133bed2f8df16d398b707fb9509230325765c57
Reviewed-on: https://go-review.googlesource.com/c/go/+/421883
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
Rob Pike 2022-06-16 17:35:05 +10:00
parent 3a067b288e
commit 36760ca9fd
4 changed files with 134 additions and 135 deletions

View file

@ -111,20 +111,26 @@ type stateFn func(*lexer) stateFn
// lexer holds the state of the scanner.
type lexer struct {
name string // the name of the input; used only for error reports
input string // the string being scanned
leftDelim string // start of action
rightDelim string // end of action
emitComment bool // emit itemComment tokens.
pos Pos // current position in the input
start Pos // start position of this item
atEOF bool // we have hit the end of input and returned eof
items chan item // channel of scanned items
parenDepth int // nesting depth of ( ) exprs
line int // 1+number of newlines seen
startLine int // start line of this item
breakOK bool // break keyword allowed
continueOK bool // continue keyword allowed
name string // the name of the input; used only for error reports
input string // the string being scanned
leftDelim string // start of action marker
rightDelim string // end of action marker
pos Pos // current position in the input
start Pos // start position of this item
atEOF bool // we have hit the end of input and returned eof
parenDepth int // nesting depth of ( ) exprs
line int // 1+number of newlines seen
startLine int // start line of this item
item item // item to return to parser
insideAction bool // are we inside an action?
options lexOptions
}
// lexOptions control behavior of the lexer. All default to false.
type lexOptions struct {
emitComment bool // emit itemComment tokens.
breakOK bool // break keyword allowed
continueOK bool // continue keyword allowed
}
// next returns the next rune in the input.
@ -160,14 +166,29 @@ func (l *lexer) backup() {
}
}
// emit passes an item back to the client.
func (l *lexer) emit(t itemType) {
l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine}
// thisItem returns the item at the current input point with the specified type
// and advances the input.
func (l *lexer) thisItem(t itemType) item {
i := item{t, l.start, l.input[l.start:l.pos], l.startLine}
l.start = l.pos
l.startLine = l.line
return i
}
// emit passes the trailing text as an item back to the parser.
func (l *lexer) emit(t itemType) stateFn {
return l.emitItem(l.thisItem(t))
}
// emitItem passes the specified item to the parser.
func (l *lexer) emitItem(i item) stateFn {
l.item = i
return nil
}
// ignore skips over the pending input before this point.
// It tracks newlines in the ignored text, so use it only
// for text that is skipped without calling l.next.
func (l *lexer) ignore() {
l.line += strings.Count(l.input[l.start:l.pos], "\n")
l.start = l.pos
@ -193,25 +214,31 @@ func (l *lexer) acceptRun(valid string) {
// errorf returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.nextItem.
func (l *lexer) errorf(format string, args ...any) stateFn {
l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
l.item = item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
l.start = 0
l.pos = 0
l.input = l.input[:0]
return nil
}
// nextItem returns the next item from the input.
// Called by the parser, not in the lexing goroutine.
func (l *lexer) nextItem() item {
return <-l.items
}
// drain drains the output so the lexing goroutine will exit.
// Called by the parser, not in the lexing goroutine.
func (l *lexer) drain() {
for range l.items {
l.item = item{itemEOF, l.pos, "EOF", l.startLine}
state := lexText
if l.insideAction {
state = lexInsideAction
}
for {
state = state(l)
if state == nil {
return l.item
}
}
}
// lex creates a new scanner for the input string.
func lex(name, input, left, right string, emitComment, breakOK, continueOK bool) *lexer {
func lex(name, input, left, right string) *lexer {
if left == "" {
left = leftDelim
}
@ -219,29 +246,17 @@ func lex(name, input, left, right string, emitComment, breakOK, continueOK bool)
right = rightDelim
}
l := &lexer{
name: name,
input: input,
leftDelim: left,
rightDelim: right,
emitComment: emitComment,
breakOK: breakOK,
continueOK: continueOK,
items: make(chan item),
line: 1,
startLine: 1,
name: name,
input: input,
leftDelim: left,
rightDelim: right,
line: 1,
startLine: 1,
insideAction: false,
}
go l.run()
return l
}
// run runs the state machine for the lexer.
func (l *lexer) run() {
for state := lexText; state != nil; {
state = state(l)
}
close(l.items)
}
// state functions
const (
@ -254,29 +269,32 @@ const (
// lexText scans until an opening action delimiter, "{{".
func lexText(l *lexer) stateFn {
if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
ldn := Pos(len(l.leftDelim))
l.pos += Pos(x)
trimLength := Pos(0)
if hasLeftTrimMarker(l.input[l.pos+ldn:]) {
trimLength = rightTrimLength(l.input[l.start:l.pos])
}
l.pos -= trimLength
if l.pos > l.start {
if x > 0 {
l.pos += Pos(x)
// Do we trim any trailing space?
trimLength := Pos(0)
delimEnd := l.pos + Pos(len(l.leftDelim))
if hasLeftTrimMarker(l.input[delimEnd:]) {
trimLength = rightTrimLength(l.input[l.start:l.pos])
}
l.pos -= trimLength
l.line += strings.Count(l.input[l.start:l.pos], "\n")
l.emit(itemText)
i := l.thisItem(itemText)
l.pos += trimLength
l.ignore()
if len(i.val) > 0 {
return l.emitItem(i)
}
}
l.pos += trimLength
l.ignore()
return lexLeftDelim
}
l.pos = Pos(len(l.input))
// Correctly reached EOF.
if l.pos > l.start {
l.line += strings.Count(l.input[l.start:l.pos], "\n")
l.emit(itemText)
return l.emit(itemText)
}
l.emit(itemEOF)
return nil
return l.emit(itemEOF)
}
// rightTrimLength returns the length of the spaces at the end of the string.
@ -301,6 +319,7 @@ func leftTrimLength(s string) Pos {
}
// lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker.
// (The text to be trimmed has already been emitted.)
func lexLeftDelim(l *lexer) stateFn {
l.pos += Pos(len(l.leftDelim))
trimSpace := hasLeftTrimMarker(l.input[l.pos:])
@ -313,28 +332,27 @@ func lexLeftDelim(l *lexer) stateFn {
l.ignore()
return lexComment
}
l.emit(itemLeftDelim)
i := l.thisItem(itemLeftDelim)
l.insideAction = true
l.pos += afterMarker
l.ignore()
l.parenDepth = 0
return lexInsideAction
return l.emitItem(i)
}
// lexComment scans a comment. The left comment marker is known to be present.
func lexComment(l *lexer) stateFn {
l.pos += Pos(len(leftComment))
i := strings.Index(l.input[l.pos:], rightComment)
if i < 0 {
x := strings.Index(l.input[l.pos:], rightComment)
if x < 0 {
return l.errorf("unclosed comment")
}
l.pos += Pos(i + len(rightComment))
l.pos += Pos(x + len(rightComment))
delim, trimSpace := l.atRightDelim()
if !delim {
return l.errorf("comment ends before closing delimiter")
}
if l.emitComment {
l.emit(itemComment)
}
i := l.thisItem(itemComment)
if trimSpace {
l.pos += trimMarkerLen
}
@ -343,6 +361,9 @@ func lexComment(l *lexer) stateFn {
l.pos += leftTrimLength(l.input[l.pos:])
}
l.ignore()
if l.options.emitComment {
return l.emitItem(i)
}
return lexText
}
@ -354,12 +375,13 @@ func lexRightDelim(l *lexer) stateFn {
l.ignore()
}
l.pos += Pos(len(l.rightDelim))
l.emit(itemRightDelim)
i := l.thisItem(itemRightDelim)
if trimSpace {
l.pos += leftTrimLength(l.input[l.pos:])
l.ignore()
}
return lexText
l.insideAction = false
return l.emitItem(i)
}
// lexInsideAction scans the elements inside action delimiters.
@ -381,14 +403,14 @@ func lexInsideAction(l *lexer) stateFn {
l.backup() // Put space back in case we have " -}}".
return lexSpace
case r == '=':
l.emit(itemAssign)
return l.emit(itemAssign)
case r == ':':
if l.next() != '=' {
return l.errorf("expected :=")
}
l.emit(itemDeclare)
return l.emit(itemDeclare)
case r == '|':
l.emit(itemPipe)
return l.emit(itemPipe)
case r == '"':
return lexQuote
case r == '`':
@ -413,20 +435,19 @@ func lexInsideAction(l *lexer) stateFn {
l.backup()
return lexIdentifier
case r == '(':
l.emit(itemLeftParen)
l.parenDepth++
return l.emit(itemLeftParen)
case r == ')':
l.emit(itemRightParen)
l.parenDepth--
if l.parenDepth < 0 {
return l.errorf("unexpected right paren %#U", r)
return l.errorf("unexpected right paren")
}
return l.emit(itemRightParen)
case r <= unicode.MaxASCII && unicode.IsPrint(r):
l.emit(itemChar)
return l.emit(itemChar)
default:
return l.errorf("unrecognized character in action: %#U", r)
}
return lexInsideAction
}
// lexSpace scans a run of space characters.
@ -451,13 +472,11 @@ func lexSpace(l *lexer) stateFn {
return lexRightDelim // On the delim, so go right to that.
}
}
l.emit(itemSpace)
return lexInsideAction
return l.emit(itemSpace)
}
// lexIdentifier scans an alphanumeric.
func lexIdentifier(l *lexer) stateFn {
Loop:
for {
switch r := l.next(); {
case isAlphaNumeric(r):
@ -471,22 +490,19 @@ Loop:
switch {
case key[word] > itemKeyword:
item := key[word]
if item == itemBreak && !l.breakOK || item == itemContinue && !l.continueOK {
l.emit(itemIdentifier)
} else {
l.emit(item)
if item == itemBreak && !l.options.breakOK || item == itemContinue && !l.options.continueOK {
return l.emit(itemIdentifier)
}
return l.emit(item)
case word[0] == '.':
l.emit(itemField)
return l.emit(itemField)
case word == "true", word == "false":
l.emit(itemBool)
return l.emit(itemBool)
default:
l.emit(itemIdentifier)
return l.emit(itemIdentifier)
}
break Loop
}
}
return lexInsideAction
}
// lexField scans a field: .Alphanumeric.
@ -499,8 +515,7 @@ func lexField(l *lexer) stateFn {
// The $ has been scanned.
func lexVariable(l *lexer) stateFn {
if l.atTerminator() { // Nothing interesting follows -> "$".
l.emit(itemVariable)
return lexInsideAction
return l.emit(itemVariable)
}
return lexFieldOrVariable(l, itemVariable)
}
@ -510,11 +525,9 @@ func lexVariable(l *lexer) stateFn {
func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
if l.atTerminator() { // Nothing interesting follows -> "." or "$".
if typ == itemVariable {
l.emit(itemVariable)
} else {
l.emit(itemDot)
return l.emit(itemVariable)
}
return lexInsideAction
return l.emit(itemDot)
}
var r rune
for {
@ -527,8 +540,7 @@ func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
if !l.atTerminator() {
return l.errorf("bad character %#U", r)
}
l.emit(typ)
return lexInsideAction
return l.emit(typ)
}
// atTerminator reports whether the input is at valid termination character to
@ -564,8 +576,7 @@ Loop:
break Loop
}
}
l.emit(itemCharConstant)
return lexInsideAction
return l.emit(itemCharConstant)
}
// lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
@ -581,11 +592,9 @@ func lexNumber(l *lexer) stateFn {
if !l.scanNumber() || l.input[l.pos-1] != 'i' {
return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
}
l.emit(itemComplex)
} else {
l.emit(itemNumber)
return l.emit(itemComplex)
}
return lexInsideAction
return l.emit(itemNumber)
}
func (l *lexer) scanNumber() bool {
@ -641,8 +650,7 @@ Loop:
break Loop
}
}
l.emit(itemString)
return lexInsideAction
return l.emit(itemString)
}
// lexRawQuote scans a raw quoted string.
@ -656,8 +664,7 @@ Loop:
break Loop
}
}
l.emit(itemRawString)
return lexInsideAction
return l.emit(itemRawString)
}
// isSpace reports whether r is a space character.

View file

@ -359,8 +359,7 @@ var lexTests = []lexTest{
{"extra right paren", "{{3)}}", []item{
tLeft,
mkItem(itemNumber, "3"),
tRpar,
mkItem(itemError, `unexpected right paren U+0029 ')'`),
mkItem(itemError, "unexpected right paren"),
}},
// Fixed bugs
@ -394,7 +393,12 @@ var lexTests = []lexTest{
// collect gathers the emitted items into a slice.
func collect(t *lexTest, left, right string) (items []item) {
l := lex(t.name, t.input, left, right, true, true, true)
l := lex(t.name, t.input, left, right)
l.options = lexOptions{
emitComment: true,
breakOK: true,
continueOK: true,
}
for {
item := l.nextItem()
items = append(items, item)
@ -431,7 +435,9 @@ func TestLex(t *testing.T) {
items := collect(&test, "", "")
if !equal(items, test.items, false) {
t.Errorf("%s: got\n\t%+v\nexpected\n\t%v", test.name, items, test.items)
return // TODO
}
t.Log(test.name, "OK")
}
}
@ -546,22 +552,6 @@ func TestPos(t *testing.T) {
}
}
// Test that an error shuts down the lexing goroutine.
func TestShutdown(t *testing.T) {
// We need to duplicate template.Parse here to hold on to the lexer.
const text = "erroneous{{define}}{{else}}1234"
lexer := lex("foo", text, "{{", "}}", false, true, true)
_, err := New("root").parseLexer(lexer)
if err == nil {
t.Fatalf("expected error")
}
// The error should have drained the input. Therefore, the lexer should be shut down.
token, ok := <-lexer.items
if ok {
t.Fatalf("input was not drained; got %v", token)
}
}
// parseLexer is a local version of parse that lets us pass in the lexer instead of building it.
// We expect an error, so the tree set and funcs list are explicitly nil.
func (t *Tree) parseLexer(lex *lexer) (tree *Tree, err error) {

View file

@ -210,7 +210,6 @@ func (t *Tree) recover(errp *error) {
panic(e)
}
if t != nil {
t.lex.drain()
t.stopParse()
}
*errp = e.(error)
@ -241,10 +240,12 @@ func (t *Tree) stopParse() {
func (t *Tree) Parse(text, leftDelim, rightDelim string, treeSet map[string]*Tree, funcs ...map[string]any) (tree *Tree, err error) {
defer t.recover(&err)
t.ParseName = t.Name
emitComment := t.Mode&ParseComments != 0
breakOK := !t.hasFunction("break")
continueOK := !t.hasFunction("continue")
lexer := lex(t.Name, text, leftDelim, rightDelim, emitComment, breakOK, continueOK)
lexer := lex(t.Name, text, leftDelim, rightDelim)
lexer.options = lexOptions{
emitComment: t.Mode&ParseComments != 0,
breakOK: !t.hasFunction("break"),
continueOK: !t.hasFunction("continue"),
}
t.startParse(funcs, lexer, treeSet)
t.text = text
t.parse()

View file

@ -489,7 +489,7 @@ var errorTests = []parseTest{
hasError, `unclosed left paren`},
{"rparen",
"{{.X 1 2 3 ) }}",
hasError, `unexpected ")" in command`},
hasError, "unexpected right paren"},
{"rparen2",
"{{(.X 1 2 3",
hasError, `unclosed action`},
@ -597,7 +597,8 @@ func TestBlock(t *testing.T) {
}
func TestLineNum(t *testing.T) {
const count = 100
// const count = 100
const count = 3
text := strings.Repeat("{{printf 1234}}\n", count)
tree, err := New("bench").Parse(text, "", "", make(map[string]*Tree), builtins)
if err != nil {
@ -611,11 +612,11 @@ func TestLineNum(t *testing.T) {
// Action first.
action := nodes[i].(*ActionNode)
if action.Line != line {
t.Fatalf("line %d: action is line %d", line, action.Line)
t.Errorf("line %d: action is line %d", line, action.Line)
}
pipe := action.Pipe
if pipe.Line != line {
t.Fatalf("line %d: pipe is line %d", line, pipe.Line)
t.Errorf("line %d: pipe is line %d", line, pipe.Line)
}
}
}