diff --git a/src/go/parser/interface.go b/src/go/parser/interface.go index e3468f481f..d911c8e1d0 100644 --- a/src/go/parser/interface.go +++ b/src/go/parser/interface.go @@ -94,8 +94,11 @@ func ParseFile(fset *token.FileSet, filename string, src any, mode Mode) (f *ast defer func() { if e := recover(); e != nil { // resume same panic if it's not a bailout - if _, ok := e.(bailout); !ok { + bail, ok := e.(bailout) + if !ok { panic(e) + } else if bail.msg != "" { + p.errors.Add(p.file.Position(bail.pos), bail.msg) } } @@ -198,8 +201,11 @@ func ParseExprFrom(fset *token.FileSet, filename string, src any, mode Mode) (ex defer func() { if e := recover(); e != nil { // resume same panic if it's not a bailout - if _, ok := e.(bailout); !ok { + bail, ok := e.(bailout) + if !ok { panic(e) + } else if bail.msg != "" { + p.errors.Add(p.file.Position(bail.pos), bail.msg) } } p.errors.Sort() diff --git a/src/go/parser/parser.go b/src/go/parser/parser.go index ca2f24c8b8..d4ad36dc67 100644 --- a/src/go/parser/parser.go +++ b/src/go/parser/parser.go @@ -59,6 +59,10 @@ type parser struct { inRhs bool // if set, the parser is parsing a rhs expression imports []*ast.ImportSpec // list of imports + + // nestLev is used to track and limit the recursion depth + // during parsing. + nestLev int } func (p *parser) init(fset *token.FileSet, filename string, src []byte, mode Mode) { @@ -108,6 +112,24 @@ func un(p *parser) { p.printTrace(")") } +// maxNestLev is the deepest we're willing to recurse during parsing +const maxNestLev int = 1e5 + +func incNestLev(p *parser) *parser { + p.nestLev++ + if p.nestLev > maxNestLev { + p.error(p.pos, "exceeded max nesting depth") + panic(bailout{}) + } + return p +} + +// decNestLev is used to track nesting depth during parsing to prevent stack exhaustion. +// It is used along with incNestLev in a similar fashion to how un and trace are used. +func decNestLev(p *parser) { + p.nestLev-- +} + // Advance to the next token. func (p *parser) next0() { // Because of one-token look-ahead, print the previous token @@ -218,8 +240,12 @@ func (p *parser) next() { } } -// A bailout panic is raised to indicate early termination. -type bailout struct{} +// A bailout panic is raised to indicate early termination. pos and msg are +// only populated when bailing out of object resolution. +type bailout struct { + pos token.Pos + msg string +} func (p *parser) error(pos token.Pos, msg string) { if p.trace { @@ -1247,6 +1273,8 @@ func (p *parser) parseTypeInstance(typ ast.Expr) ast.Expr { } func (p *parser) tryIdentOrType() ast.Expr { + defer decNestLev(incNestLev(p)) + switch p.tok { case token.IDENT: typ := p.parseTypeName(nil) @@ -1657,7 +1685,13 @@ func (p *parser) parsePrimaryExpr(x ast.Expr) ast.Expr { if x == nil { x = p.parseOperand() } - for { + // We track the nesting here rather than at the entry for the function, + // since it can iteratively produce a nested output, and we want to + // limit how deep a structure we generate. + var n int + defer func() { p.nestLev -= n }() + for n = 1; ; n++ { + incNestLev(p) switch p.tok { case token.PERIOD: p.next() @@ -1717,6 +1751,8 @@ func (p *parser) parsePrimaryExpr(x ast.Expr) ast.Expr { } func (p *parser) parseUnaryExpr() ast.Expr { + defer decNestLev(incNestLev(p)) + if p.trace { defer un(trace(p, "UnaryExpr")) } @@ -1806,7 +1842,13 @@ func (p *parser) parseBinaryExpr(x ast.Expr, prec1 int, check bool) ast.Expr { if x == nil { x = p.parseUnaryExpr() } - for { + // We track the nesting here rather than at the entry for the function, + // since it can iteratively produce a nested output, and we want to + // limit how deep a structure we generate. + var n int + defer func() { p.nestLev -= n }() + for n = 1; ; n++ { + incNestLev(p) op, oprec := p.tokPrec() if oprec < prec1 { return x @@ -2099,6 +2141,8 @@ func (p *parser) parseIfHeader() (init ast.Stmt, cond ast.Expr) { } func (p *parser) parseIfStmt() *ast.IfStmt { + defer decNestLev(incNestLev(p)) + if p.trace { defer un(trace(p, "IfStmt")) } @@ -2402,6 +2446,8 @@ func (p *parser) parseForStmt() ast.Stmt { } func (p *parser) parseStmt() (s ast.Stmt) { + defer decNestLev(incNestLev(p)) + if p.trace { defer un(trace(p, "Statement")) } diff --git a/src/go/parser/parser_test.go b/src/go/parser/parser_test.go index a4f882d368..1a46c87866 100644 --- a/src/go/parser/parser_test.go +++ b/src/go/parser/parser_test.go @@ -10,6 +10,7 @@ import ( "go/ast" "go/token" "io/fs" + "runtime" "strings" "testing" ) @@ -577,3 +578,171 @@ type x int // comment t.Errorf("got %q, want %q", comment, "// comment") } } + +var parseDepthTests = []struct { + name string + format string + // multipler is used when a single statement may result in more than one + // change in the depth level, for instance "1+(..." produces a BinaryExpr + // followed by a UnaryExpr, which increments the depth twice. The test + // case comment explains which nodes are triggering the multiple depth + // changes. + parseMultiplier int + // scope is true if we should also test the statement for the resolver scope + // depth limit. + scope bool + // scopeMultiplier does the same as parseMultiplier, but for the scope + // depths. + scopeMultiplier int +}{ + // The format expands the part inside « » many times. + // A second set of brackets nested inside the first stops the repetition, + // so that for example «(«1»)» expands to (((...((((1))))...))). + {name: "array", format: "package main; var x «[1]»int"}, + {name: "slice", format: "package main; var x «[]»int"}, + {name: "struct", format: "package main; var x «struct { X «int» }»", scope: true}, + {name: "pointer", format: "package main; var x «*»int"}, + {name: "func", format: "package main; var x «func()»int", scope: true}, + {name: "chan", format: "package main; var x «chan »int"}, + {name: "chan2", format: "package main; var x «<-chan »int"}, + {name: "interface", format: "package main; var x «interface { M() «int» }»", scope: true, scopeMultiplier: 2}, // Scopes: InterfaceType, FuncType + {name: "map", format: "package main; var x «map[int]»int"}, + {name: "slicelit", format: "package main; var x = «[]any{«»}»", parseMultiplier: 2}, // Parser nodes: UnaryExpr, CompositeLit + {name: "arraylit", format: "package main; var x = «[1]any{«nil»}»", parseMultiplier: 2}, // Parser nodes: UnaryExpr, CompositeLit + {name: "structlit", format: "package main; var x = «struct{x any}{«nil»}»", parseMultiplier: 2}, // Parser nodes: UnaryExpr, CompositeLit + {name: "maplit", format: "package main; var x = «map[int]any{1:«nil»}»", parseMultiplier: 2}, // Parser nodes: CompositeLit, KeyValueExpr + {name: "dot", format: "package main; var x = «x.»x"}, + {name: "index", format: "package main; var x = x«[1]»"}, + {name: "slice", format: "package main; var x = x«[1:2]»"}, + {name: "slice3", format: "package main; var x = x«[1:2:3]»"}, + {name: "dottype", format: "package main; var x = x«.(any)»"}, + {name: "callseq", format: "package main; var x = x«()»"}, + {name: "methseq", format: "package main; var x = x«.m()»", parseMultiplier: 2}, // Parser nodes: SelectorExpr, CallExpr + {name: "binary", format: "package main; var x = «1+»1"}, + {name: "binaryparen", format: "package main; var x = «1+(«1»)»", parseMultiplier: 2}, // Parser nodes: BinaryExpr, ParenExpr + {name: "unary", format: "package main; var x = «^»1"}, + {name: "addr", format: "package main; var x = «& »x"}, + {name: "star", format: "package main; var x = «*»x"}, + {name: "recv", format: "package main; var x = «<-»x"}, + {name: "call", format: "package main; var x = «f(«1»)»", parseMultiplier: 2}, // Parser nodes: Ident, CallExpr + {name: "conv", format: "package main; var x = «(*T)(«1»)»", parseMultiplier: 2}, // Parser nodes: ParenExpr, CallExpr + {name: "label", format: "package main; func main() { «Label:» }"}, + {name: "if", format: "package main; func main() { «if true { «» }»}", parseMultiplier: 2, scope: true, scopeMultiplier: 2}, // Parser nodes: IfStmt, BlockStmt. Scopes: IfStmt, BlockStmt + {name: "ifelse", format: "package main; func main() { «if true {} else » {} }", scope: true}, + {name: "switch", format: "package main; func main() { «switch { default: «» }»}", scope: true, scopeMultiplier: 2}, // Scopes: TypeSwitchStmt, CaseClause + {name: "typeswitch", format: "package main; func main() { «switch x.(type) { default: «» }» }", scope: true, scopeMultiplier: 2}, // Scopes: TypeSwitchStmt, CaseClause + {name: "for0", format: "package main; func main() { «for { «» }» }", scope: true, scopeMultiplier: 2}, // Scopes: ForStmt, BlockStmt + {name: "for1", format: "package main; func main() { «for x { «» }» }", scope: true, scopeMultiplier: 2}, // Scopes: ForStmt, BlockStmt + {name: "for3", format: "package main; func main() { «for f(); g(); h() { «» }» }", scope: true, scopeMultiplier: 2}, // Scopes: ForStmt, BlockStmt + {name: "forrange0", format: "package main; func main() { «for range x { «» }» }", scope: true, scopeMultiplier: 2}, // Scopes: RangeStmt, BlockStmt + {name: "forrange1", format: "package main; func main() { «for x = range z { «» }» }", scope: true, scopeMultiplier: 2}, // Scopes: RangeStmt, BlockStmt + {name: "forrange2", format: "package main; func main() { «for x, y = range z { «» }» }", scope: true, scopeMultiplier: 2}, // Scopes: RangeStmt, BlockStmt + {name: "go", format: "package main; func main() { «go func() { «» }()» }", parseMultiplier: 2, scope: true}, // Parser nodes: GoStmt, FuncLit + {name: "defer", format: "package main; func main() { «defer func() { «» }()» }", parseMultiplier: 2, scope: true}, // Parser nodes: DeferStmt, FuncLit + {name: "select", format: "package main; func main() { «select { default: «» }» }", scope: true}, +} + +// split splits pre«mid»post into pre, mid, post. +// If the string does not have that form, split returns x, "", "". +func split(x string) (pre, mid, post string) { + start, end := strings.Index(x, "«"), strings.LastIndex(x, "»") + if start < 0 || end < 0 { + return x, "", "" + } + return x[:start], x[start+len("«") : end], x[end+len("»"):] +} + +func TestParseDepthLimit(t *testing.T) { + if runtime.GOARCH == "wasm" { + t.Skip("causes call stack exhaustion on js/wasm") + } + for _, tt := range parseDepthTests { + for _, size := range []string{"small", "big"} { + t.Run(tt.name+"/"+size, func(t *testing.T) { + n := maxNestLev + 1 + if tt.parseMultiplier > 0 { + n /= tt.parseMultiplier + } + if size == "small" { + // Decrease the number of statements by 10, in order to check + // that we do not fail when under the limit. 10 is used to + // provide some wiggle room for cases where the surrounding + // scaffolding syntax adds some noise to the depth that changes + // on a per testcase basis. + n -= 10 + } + + pre, mid, post := split(tt.format) + if strings.Contains(mid, "«") { + left, base, right := split(mid) + mid = strings.Repeat(left, n) + base + strings.Repeat(right, n) + } else { + mid = strings.Repeat(mid, n) + } + input := pre + mid + post + + fset := token.NewFileSet() + _, err := ParseFile(fset, "", input, ParseComments|SkipObjectResolution) + if size == "small" { + if err != nil { + t.Errorf("ParseFile(...): %v (want success)", err) + } + } else { + expected := "exceeded max nesting depth" + if err == nil || !strings.HasSuffix(err.Error(), expected) { + t.Errorf("ParseFile(...) = _, %v, want %q", err, expected) + } + } + }) + } + } +} + +func TestScopeDepthLimit(t *testing.T) { + if runtime.GOARCH == "wasm" { + t.Skip("causes call stack exhaustion on js/wasm") + } + for _, tt := range parseDepthTests { + if !tt.scope { + continue + } + for _, size := range []string{"small", "big"} { + t.Run(tt.name+"/"+size, func(t *testing.T) { + n := maxScopeDepth + 1 + if tt.scopeMultiplier > 0 { + n /= tt.scopeMultiplier + } + if size == "small" { + // Decrease the number of statements by 10, in order to check + // that we do not fail when under the limit. 10 is used to + // provide some wiggle room for cases where the surrounding + // scaffolding syntax adds some noise to the depth that changes + // on a per testcase basis. + n -= 10 + } + + pre, mid, post := split(tt.format) + if strings.Contains(mid, "«") { + left, base, right := split(mid) + mid = strings.Repeat(left, n) + base + strings.Repeat(right, n) + } else { + mid = strings.Repeat(mid, n) + } + input := pre + mid + post + + fset := token.NewFileSet() + _, err := ParseFile(fset, "", input, DeclarationErrors) + if size == "small" { + if err != nil { + t.Errorf("ParseFile(...): %v (want success)", err) + } + } else { + expected := "exceeded max scope depth during object resolution" + if err == nil || !strings.HasSuffix(err.Error(), expected) { + t.Errorf("ParseFile(...) = _, %v, want %q", err, expected) + } + } + }) + } + } +} diff --git a/src/go/parser/resolver.go b/src/go/parser/resolver.go index 767a5e20ad..f8ff618eba 100644 --- a/src/go/parser/resolver.go +++ b/src/go/parser/resolver.go @@ -54,6 +54,8 @@ func resolveFile(file *ast.File, handle *token.File, declErr func(token.Pos, str file.Unresolved = r.unresolved[0:i] } +const maxScopeDepth int = 1e3 + type resolver struct { handle *token.File declErr func(token.Pos, string) @@ -85,16 +87,19 @@ func (r *resolver) sprintf(format string, args ...any) string { } func (r *resolver) openScope(pos token.Pos) { + r.depth++ + if r.depth > maxScopeDepth { + panic(bailout{pos: pos, msg: "exceeded max scope depth during object resolution"}) + } if debugResolve { r.trace("opening scope @%v", pos) - r.depth++ } r.topScope = ast.NewScope(r.topScope) } func (r *resolver) closeScope() { + r.depth-- if debugResolve { - r.depth-- r.trace("closing scope") } r.topScope = r.topScope.Outer