From 36760ca9fd3eaf2c79efef48e533b66da6c542bd Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Thu, 16 Jun 2022 17:35:05 +1000 Subject: [PATCH] text/template/parse: simplify I/O in lexing The concurrent model for delivering tokens was fine for pedagogy, but has caused a few problems as the package has evolved (that is, got more complicated). It's easy to eliminate it, simplifying or removing some of the hacks used to work around these prolems. The old lexer would deliver tokens over a channel to the parsing goroutine, and continue running until EOF. In this rewrite, we instead run the machine until a token is ready, and shut it down until the next token is needed. The mechanism is just to return nil as the state function, which requires a bit more threading of return values through the state functions but is not difficult. The change is modest. A couple of error messages change, but otherwise the change has no external effect. This is just an internal cleanup, long overdue. benchmark old ns/op new ns/op delta BenchmarkParseLarge-20 12222729 6769966 -44.61% BenchmarkVariableString-20 73.5 73.4 -0.16% BenchmarkListString-20 1827 1841 +0.77% benchmark old allocs new allocs delta BenchmarkVariableString-20 3 3 +0.00% BenchmarkListString-20 31 31 +0.00% benchmark old bytes new bytes delta BenchmarkVariableString-20 72 72 +0.00% BenchmarkListString-20 1473 1473 +0.00% Fixes #53261 Change-Id: I4133bed2f8df16d398b707fb9509230325765c57 Reviewed-on: https://go-review.googlesource.com/c/go/+/421883 Reviewed-by: Austin Clements Reviewed-by: Russ Cox --- src/text/template/parse/lex.go | 221 +++++++++++++------------- src/text/template/parse/lex_test.go | 28 ++-- src/text/template/parse/parse.go | 11 +- src/text/template/parse/parse_test.go | 9 +- 4 files changed, 134 insertions(+), 135 deletions(-) diff --git a/src/text/template/parse/lex.go b/src/text/template/parse/lex.go index 29403dd947..3562e0abc9 100644 --- a/src/text/template/parse/lex.go +++ b/src/text/template/parse/lex.go @@ -111,20 +111,26 @@ type stateFn func(*lexer) stateFn // lexer holds the state of the scanner. type lexer struct { - name string // the name of the input; used only for error reports - input string // the string being scanned - leftDelim string // start of action - rightDelim string // end of action - emitComment bool // emit itemComment tokens. - pos Pos // current position in the input - start Pos // start position of this item - atEOF bool // we have hit the end of input and returned eof - items chan item // channel of scanned items - parenDepth int // nesting depth of ( ) exprs - line int // 1+number of newlines seen - startLine int // start line of this item - breakOK bool // break keyword allowed - continueOK bool // continue keyword allowed + name string // the name of the input; used only for error reports + input string // the string being scanned + leftDelim string // start of action marker + rightDelim string // end of action marker + pos Pos // current position in the input + start Pos // start position of this item + atEOF bool // we have hit the end of input and returned eof + parenDepth int // nesting depth of ( ) exprs + line int // 1+number of newlines seen + startLine int // start line of this item + item item // item to return to parser + insideAction bool // are we inside an action? + options lexOptions +} + +// lexOptions control behavior of the lexer. All default to false. +type lexOptions struct { + emitComment bool // emit itemComment tokens. + breakOK bool // break keyword allowed + continueOK bool // continue keyword allowed } // next returns the next rune in the input. @@ -160,14 +166,29 @@ func (l *lexer) backup() { } } -// emit passes an item back to the client. -func (l *lexer) emit(t itemType) { - l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine} +// thisItem returns the item at the current input point with the specified type +// and advances the input. +func (l *lexer) thisItem(t itemType) item { + i := item{t, l.start, l.input[l.start:l.pos], l.startLine} l.start = l.pos l.startLine = l.line + return i +} + +// emit passes the trailing text as an item back to the parser. +func (l *lexer) emit(t itemType) stateFn { + return l.emitItem(l.thisItem(t)) +} + +// emitItem passes the specified item to the parser. +func (l *lexer) emitItem(i item) stateFn { + l.item = i + return nil } // ignore skips over the pending input before this point. +// It tracks newlines in the ignored text, so use it only +// for text that is skipped without calling l.next. func (l *lexer) ignore() { l.line += strings.Count(l.input[l.start:l.pos], "\n") l.start = l.pos @@ -193,25 +214,31 @@ func (l *lexer) acceptRun(valid string) { // errorf returns an error token and terminates the scan by passing // back a nil pointer that will be the next state, terminating l.nextItem. func (l *lexer) errorf(format string, args ...any) stateFn { - l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine} + l.item = item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine} + l.start = 0 + l.pos = 0 + l.input = l.input[:0] return nil } // nextItem returns the next item from the input. // Called by the parser, not in the lexing goroutine. func (l *lexer) nextItem() item { - return <-l.items -} - -// drain drains the output so the lexing goroutine will exit. -// Called by the parser, not in the lexing goroutine. -func (l *lexer) drain() { - for range l.items { + l.item = item{itemEOF, l.pos, "EOF", l.startLine} + state := lexText + if l.insideAction { + state = lexInsideAction + } + for { + state = state(l) + if state == nil { + return l.item + } } } // lex creates a new scanner for the input string. -func lex(name, input, left, right string, emitComment, breakOK, continueOK bool) *lexer { +func lex(name, input, left, right string) *lexer { if left == "" { left = leftDelim } @@ -219,29 +246,17 @@ func lex(name, input, left, right string, emitComment, breakOK, continueOK bool) right = rightDelim } l := &lexer{ - name: name, - input: input, - leftDelim: left, - rightDelim: right, - emitComment: emitComment, - breakOK: breakOK, - continueOK: continueOK, - items: make(chan item), - line: 1, - startLine: 1, + name: name, + input: input, + leftDelim: left, + rightDelim: right, + line: 1, + startLine: 1, + insideAction: false, } - go l.run() return l } -// run runs the state machine for the lexer. -func (l *lexer) run() { - for state := lexText; state != nil; { - state = state(l) - } - close(l.items) -} - // state functions const ( @@ -254,29 +269,32 @@ const ( // lexText scans until an opening action delimiter, "{{". func lexText(l *lexer) stateFn { if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 { - ldn := Pos(len(l.leftDelim)) - l.pos += Pos(x) - trimLength := Pos(0) - if hasLeftTrimMarker(l.input[l.pos+ldn:]) { - trimLength = rightTrimLength(l.input[l.start:l.pos]) - } - l.pos -= trimLength - if l.pos > l.start { + if x > 0 { + l.pos += Pos(x) + // Do we trim any trailing space? + trimLength := Pos(0) + delimEnd := l.pos + Pos(len(l.leftDelim)) + if hasLeftTrimMarker(l.input[delimEnd:]) { + trimLength = rightTrimLength(l.input[l.start:l.pos]) + } + l.pos -= trimLength l.line += strings.Count(l.input[l.start:l.pos], "\n") - l.emit(itemText) + i := l.thisItem(itemText) + l.pos += trimLength + l.ignore() + if len(i.val) > 0 { + return l.emitItem(i) + } } - l.pos += trimLength - l.ignore() return lexLeftDelim } l.pos = Pos(len(l.input)) // Correctly reached EOF. if l.pos > l.start { l.line += strings.Count(l.input[l.start:l.pos], "\n") - l.emit(itemText) + return l.emit(itemText) } - l.emit(itemEOF) - return nil + return l.emit(itemEOF) } // rightTrimLength returns the length of the spaces at the end of the string. @@ -301,6 +319,7 @@ func leftTrimLength(s string) Pos { } // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker. +// (The text to be trimmed has already been emitted.) func lexLeftDelim(l *lexer) stateFn { l.pos += Pos(len(l.leftDelim)) trimSpace := hasLeftTrimMarker(l.input[l.pos:]) @@ -313,28 +332,27 @@ func lexLeftDelim(l *lexer) stateFn { l.ignore() return lexComment } - l.emit(itemLeftDelim) + i := l.thisItem(itemLeftDelim) + l.insideAction = true l.pos += afterMarker l.ignore() l.parenDepth = 0 - return lexInsideAction + return l.emitItem(i) } // lexComment scans a comment. The left comment marker is known to be present. func lexComment(l *lexer) stateFn { l.pos += Pos(len(leftComment)) - i := strings.Index(l.input[l.pos:], rightComment) - if i < 0 { + x := strings.Index(l.input[l.pos:], rightComment) + if x < 0 { return l.errorf("unclosed comment") } - l.pos += Pos(i + len(rightComment)) + l.pos += Pos(x + len(rightComment)) delim, trimSpace := l.atRightDelim() if !delim { return l.errorf("comment ends before closing delimiter") } - if l.emitComment { - l.emit(itemComment) - } + i := l.thisItem(itemComment) if trimSpace { l.pos += trimMarkerLen } @@ -343,6 +361,9 @@ func lexComment(l *lexer) stateFn { l.pos += leftTrimLength(l.input[l.pos:]) } l.ignore() + if l.options.emitComment { + return l.emitItem(i) + } return lexText } @@ -354,12 +375,13 @@ func lexRightDelim(l *lexer) stateFn { l.ignore() } l.pos += Pos(len(l.rightDelim)) - l.emit(itemRightDelim) + i := l.thisItem(itemRightDelim) if trimSpace { l.pos += leftTrimLength(l.input[l.pos:]) l.ignore() } - return lexText + l.insideAction = false + return l.emitItem(i) } // lexInsideAction scans the elements inside action delimiters. @@ -381,14 +403,14 @@ func lexInsideAction(l *lexer) stateFn { l.backup() // Put space back in case we have " -}}". return lexSpace case r == '=': - l.emit(itemAssign) + return l.emit(itemAssign) case r == ':': if l.next() != '=' { return l.errorf("expected :=") } - l.emit(itemDeclare) + return l.emit(itemDeclare) case r == '|': - l.emit(itemPipe) + return l.emit(itemPipe) case r == '"': return lexQuote case r == '`': @@ -413,20 +435,19 @@ func lexInsideAction(l *lexer) stateFn { l.backup() return lexIdentifier case r == '(': - l.emit(itemLeftParen) l.parenDepth++ + return l.emit(itemLeftParen) case r == ')': - l.emit(itemRightParen) l.parenDepth-- if l.parenDepth < 0 { - return l.errorf("unexpected right paren %#U", r) + return l.errorf("unexpected right paren") } + return l.emit(itemRightParen) case r <= unicode.MaxASCII && unicode.IsPrint(r): - l.emit(itemChar) + return l.emit(itemChar) default: return l.errorf("unrecognized character in action: %#U", r) } - return lexInsideAction } // lexSpace scans a run of space characters. @@ -451,13 +472,11 @@ func lexSpace(l *lexer) stateFn { return lexRightDelim // On the delim, so go right to that. } } - l.emit(itemSpace) - return lexInsideAction + return l.emit(itemSpace) } // lexIdentifier scans an alphanumeric. func lexIdentifier(l *lexer) stateFn { -Loop: for { switch r := l.next(); { case isAlphaNumeric(r): @@ -471,22 +490,19 @@ Loop: switch { case key[word] > itemKeyword: item := key[word] - if item == itemBreak && !l.breakOK || item == itemContinue && !l.continueOK { - l.emit(itemIdentifier) - } else { - l.emit(item) + if item == itemBreak && !l.options.breakOK || item == itemContinue && !l.options.continueOK { + return l.emit(itemIdentifier) } + return l.emit(item) case word[0] == '.': - l.emit(itemField) + return l.emit(itemField) case word == "true", word == "false": - l.emit(itemBool) + return l.emit(itemBool) default: - l.emit(itemIdentifier) + return l.emit(itemIdentifier) } - break Loop } } - return lexInsideAction } // lexField scans a field: .Alphanumeric. @@ -499,8 +515,7 @@ func lexField(l *lexer) stateFn { // The $ has been scanned. func lexVariable(l *lexer) stateFn { if l.atTerminator() { // Nothing interesting follows -> "$". - l.emit(itemVariable) - return lexInsideAction + return l.emit(itemVariable) } return lexFieldOrVariable(l, itemVariable) } @@ -510,11 +525,9 @@ func lexVariable(l *lexer) stateFn { func lexFieldOrVariable(l *lexer, typ itemType) stateFn { if l.atTerminator() { // Nothing interesting follows -> "." or "$". if typ == itemVariable { - l.emit(itemVariable) - } else { - l.emit(itemDot) + return l.emit(itemVariable) } - return lexInsideAction + return l.emit(itemDot) } var r rune for { @@ -527,8 +540,7 @@ func lexFieldOrVariable(l *lexer, typ itemType) stateFn { if !l.atTerminator() { return l.errorf("bad character %#U", r) } - l.emit(typ) - return lexInsideAction + return l.emit(typ) } // atTerminator reports whether the input is at valid termination character to @@ -564,8 +576,7 @@ Loop: break Loop } } - l.emit(itemCharConstant) - return lexInsideAction + return l.emit(itemCharConstant) } // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This @@ -581,11 +592,9 @@ func lexNumber(l *lexer) stateFn { if !l.scanNumber() || l.input[l.pos-1] != 'i' { return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) } - l.emit(itemComplex) - } else { - l.emit(itemNumber) + return l.emit(itemComplex) } - return lexInsideAction + return l.emit(itemNumber) } func (l *lexer) scanNumber() bool { @@ -641,8 +650,7 @@ Loop: break Loop } } - l.emit(itemString) - return lexInsideAction + return l.emit(itemString) } // lexRawQuote scans a raw quoted string. @@ -656,8 +664,7 @@ Loop: break Loop } } - l.emit(itemRawString) - return lexInsideAction + return l.emit(itemRawString) } // isSpace reports whether r is a space character. diff --git a/src/text/template/parse/lex_test.go b/src/text/template/parse/lex_test.go index c5f429667c..947889a80b 100644 --- a/src/text/template/parse/lex_test.go +++ b/src/text/template/parse/lex_test.go @@ -359,8 +359,7 @@ var lexTests = []lexTest{ {"extra right paren", "{{3)}}", []item{ tLeft, mkItem(itemNumber, "3"), - tRpar, - mkItem(itemError, `unexpected right paren U+0029 ')'`), + mkItem(itemError, "unexpected right paren"), }}, // Fixed bugs @@ -394,7 +393,12 @@ var lexTests = []lexTest{ // collect gathers the emitted items into a slice. func collect(t *lexTest, left, right string) (items []item) { - l := lex(t.name, t.input, left, right, true, true, true) + l := lex(t.name, t.input, left, right) + l.options = lexOptions{ + emitComment: true, + breakOK: true, + continueOK: true, + } for { item := l.nextItem() items = append(items, item) @@ -431,7 +435,9 @@ func TestLex(t *testing.T) { items := collect(&test, "", "") if !equal(items, test.items, false) { t.Errorf("%s: got\n\t%+v\nexpected\n\t%v", test.name, items, test.items) + return // TODO } + t.Log(test.name, "OK") } } @@ -546,22 +552,6 @@ func TestPos(t *testing.T) { } } -// Test that an error shuts down the lexing goroutine. -func TestShutdown(t *testing.T) { - // We need to duplicate template.Parse here to hold on to the lexer. - const text = "erroneous{{define}}{{else}}1234" - lexer := lex("foo", text, "{{", "}}", false, true, true) - _, err := New("root").parseLexer(lexer) - if err == nil { - t.Fatalf("expected error") - } - // The error should have drained the input. Therefore, the lexer should be shut down. - token, ok := <-lexer.items - if ok { - t.Fatalf("input was not drained; got %v", token) - } -} - // parseLexer is a local version of parse that lets us pass in the lexer instead of building it. // We expect an error, so the tree set and funcs list are explicitly nil. func (t *Tree) parseLexer(lex *lexer) (tree *Tree, err error) { diff --git a/src/text/template/parse/parse.go b/src/text/template/parse/parse.go index 00c258ad5d..87b7618f75 100644 --- a/src/text/template/parse/parse.go +++ b/src/text/template/parse/parse.go @@ -210,7 +210,6 @@ func (t *Tree) recover(errp *error) { panic(e) } if t != nil { - t.lex.drain() t.stopParse() } *errp = e.(error) @@ -241,10 +240,12 @@ func (t *Tree) stopParse() { func (t *Tree) Parse(text, leftDelim, rightDelim string, treeSet map[string]*Tree, funcs ...map[string]any) (tree *Tree, err error) { defer t.recover(&err) t.ParseName = t.Name - emitComment := t.Mode&ParseComments != 0 - breakOK := !t.hasFunction("break") - continueOK := !t.hasFunction("continue") - lexer := lex(t.Name, text, leftDelim, rightDelim, emitComment, breakOK, continueOK) + lexer := lex(t.Name, text, leftDelim, rightDelim) + lexer.options = lexOptions{ + emitComment: t.Mode&ParseComments != 0, + breakOK: !t.hasFunction("break"), + continueOK: !t.hasFunction("continue"), + } t.startParse(funcs, lexer, treeSet) t.text = text t.parse() diff --git a/src/text/template/parse/parse_test.go b/src/text/template/parse/parse_test.go index fdb25d78f5..f6a9fdd872 100644 --- a/src/text/template/parse/parse_test.go +++ b/src/text/template/parse/parse_test.go @@ -489,7 +489,7 @@ var errorTests = []parseTest{ hasError, `unclosed left paren`}, {"rparen", "{{.X 1 2 3 ) }}", - hasError, `unexpected ")" in command`}, + hasError, "unexpected right paren"}, {"rparen2", "{{(.X 1 2 3", hasError, `unclosed action`}, @@ -597,7 +597,8 @@ func TestBlock(t *testing.T) { } func TestLineNum(t *testing.T) { - const count = 100 + // const count = 100 + const count = 3 text := strings.Repeat("{{printf 1234}}\n", count) tree, err := New("bench").Parse(text, "", "", make(map[string]*Tree), builtins) if err != nil { @@ -611,11 +612,11 @@ func TestLineNum(t *testing.T) { // Action first. action := nodes[i].(*ActionNode) if action.Line != line { - t.Fatalf("line %d: action is line %d", line, action.Line) + t.Errorf("line %d: action is line %d", line, action.Line) } pipe := action.Pipe if pipe.Line != line { - t.Fatalf("line %d: pipe is line %d", line, pipe.Line) + t.Errorf("line %d: pipe is line %d", line, pipe.Line) } } }