cmd/compile/internal/syntax: fast Go syntax trees, initial commit.

Syntax tree nodes, scanner, parser, basic printers. Builds syntax trees for entire Go std lib at a rate of ~1.8M lines/s in warmed up state (MacMini, 2.3 GHz Intel Core i7, 8GB RAM): $ go test -run StdLib -fast parsed 1074617 lines (2832 files) in 579.66364ms (1853863 lines/s) allocated 282.212Mb (486.854Mb/s) PASS Change-Id: Ie26d9a7bf4e5ff07457aedfcc9b89f0eba72ae3f Reviewed-on: https://go-review.googlesource.com/27195 Run-TryBot: Matthew Dempsky <mdempsky@google.com> Reviewed-by: Robert Griesemer <gri@golang.org>
2024-09-04 23:44:16 +00:00 · 2016-03-04 17:09:08 -08:00 · 2016-03-04 17:09:08 -08:00 · c8683ff797
parent 3b967be421
commit c8683ff797
13 changed files with 5354 additions and 0 deletions
--- a/misc/nacl/testzip.proto
+++ b/misc/nacl/testzip.proto
@ -18,6 +18,10 @@ go	src=..
 					asm
 						testdata
 							+
+			compile
+				internal
+					syntax
+						parser.go
 			doc
 				main.go
 				pkg.go
--- a/src/cmd/compile/internal/syntax/dumper.go
+++ b/src/cmd/compile/internal/syntax/dumper.go
@ -0,0 +1,212 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements printing of syntax tree structures.
+
+package syntax
+
+import (
+	"fmt"
+	"io"
+	"reflect"
+	"unicode"
+	"unicode/utf8"
+)
+
+// Fdump dumps the structure of the syntax tree rooted at n to w.
+// It is intended for debugging purposes; no specific output format
+// is guaranteed.
+func Fdump(w io.Writer, n Node) (err error) {
+	p := dumper{
+		output: w,
+		ptrmap: make(map[Node]int),
+		last:   '\n', // force printing of line number on first line
+	}
+
+	defer func() {
+		if e := recover(); e != nil {
+			err = e.(localError).err // re-panics if it's not a localError
+		}
+	}()
+
+	if n == nil {
+		p.printf("nil\n")
+		return
+	}
+	p.dump(reflect.ValueOf(n), n)
+	p.printf("\n")
+
+	return
+}
+
+type dumper struct {
+	output io.Writer
+	ptrmap map[Node]int // node -> dump line number
+	indent int          // current indentation level
+	last   byte         // last byte processed by Write
+	line   int          // current line number
+}
+
+var indentBytes = []byte(".  ")
+
+func (p *dumper) Write(data []byte) (n int, err error) {
+	var m int
+	for i, b := range data {
+		// invariant: data[0:n] has been written
+		if b == '\n' {
+			m, err = p.output.Write(data[n : i+1])
+			n += m
+			if err != nil {
+				return
+			}
+		} else if p.last == '\n' {
+			p.line++
+			_, err = fmt.Fprintf(p.output, "%6d  ", p.line)
+			if err != nil {
+				return
+			}
+			for j := p.indent; j > 0; j-- {
+				_, err = p.output.Write(indentBytes)
+				if err != nil {
+					return
+				}
+			}
+		}
+		p.last = b
+	}
+	if len(data) > n {
+		m, err = p.output.Write(data[n:])
+		n += m
+	}
+	return
+}
+
+// localError wraps locally caught errors so we can distinguish
+// them from genuine panics which we don't want to return as errors.
+type localError struct {
+	err error
+}
+
+// printf is a convenience wrapper that takes care of print errors.
+func (p *dumper) printf(format string, args ...interface{}) {
+	if _, err := fmt.Fprintf(p, format, args...); err != nil {
+		panic(localError{err})
+	}
+}
+
+// dump prints the contents of x.
+// If x is the reflect.Value of a struct s, where &s
+// implements Node, then &s should be passed for n -
+// this permits printing of the unexported span and
+// comments fields of the embedded isNode field by
+// calling the Span() and Comment() instead of using
+// reflection.
+func (p *dumper) dump(x reflect.Value, n Node) {
+	switch x.Kind() {
+	case reflect.Interface:
+		if x.IsNil() {
+			p.printf("nil")
+			return
+		}
+		p.dump(x.Elem(), nil)
+
+	case reflect.Ptr:
+		if x.IsNil() {
+			p.printf("nil")
+			return
+		}
+
+		// special cases for identifiers w/o attached comments (common case)
+		if x, ok := x.Interface().(*Name); ok {
+			p.printf(x.Value)
+			return
+		}
+
+		p.printf("*")
+		// Fields may share type expressions, and declarations
+		// may share the same group - use ptrmap to keep track
+		// of nodes that have been printed already.
+		if ptr, ok := x.Interface().(Node); ok {
+			if line, exists := p.ptrmap[ptr]; exists {
+				p.printf("(Node @ %d)", line)
+				return
+			}
+			p.ptrmap[ptr] = p.line
+			n = ptr
+		}
+		p.dump(x.Elem(), n)
+
+	case reflect.Slice:
+		if x.IsNil() {
+			p.printf("nil")
+			return
+		}
+		p.printf("%s (%d entries) {", x.Type(), x.Len())
+		if x.Len() > 0 {
+			p.indent++
+			p.printf("\n")
+			for i, n := 0, x.Len(); i < n; i++ {
+				p.printf("%d: ", i)
+				p.dump(x.Index(i), nil)
+				p.printf("\n")
+			}
+			p.indent--
+		}
+		p.printf("}")
+
+	case reflect.Struct:
+		typ := x.Type()
+
+		// if span, ok := x.Interface().(lexical.Span); ok {
+		// 	p.printf("%s", &span)
+		// 	return
+		// }
+
+		p.printf("%s {", typ)
+		p.indent++
+
+		first := true
+		if n != nil {
+			p.printf("\n")
+			first = false
+			// p.printf("Span: %s\n", n.Span())
+			// if c := *n.Comments(); c != nil {
+			// 	p.printf("Comments: ")
+			// 	p.dump(reflect.ValueOf(c), nil) // a Comment is not a Node
+			// 	p.printf("\n")
+			// }
+		}
+
+		for i, n := 0, typ.NumField(); i < n; i++ {
+			// Exclude non-exported fields because their
+			// values cannot be accessed via reflection.
+			if name := typ.Field(i).Name; isExported(name) {
+				if first {
+					p.printf("\n")
+					first = false
+				}
+				p.printf("%s: ", name)
+				p.dump(x.Field(i), nil)
+				p.printf("\n")
+			}
+		}
+
+		p.indent--
+		p.printf("}")
+
+	default:
+		switch x := x.Interface().(type) {
+		case string:
+			// print strings in quotes
+			p.printf("%q", x)
+		default:
+			p.printf("%v", x)
+		}
+	}
+}
+
+func isExported(name string) bool {
+	ch, _ := utf8.DecodeRuneInString(name)
+	return unicode.IsUpper(ch)
+}
--- a/src/cmd/compile/internal/syntax/dumper_test.go
+++ b/src/cmd/compile/internal/syntax/dumper_test.go
@ -0,0 +1,22 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"os"
+	"testing"
+)
+
+func TestDump(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode")
+	}
+
+	ast, err := ReadFile(*src, nil, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	Fdump(os.Stdout, ast)
+}
--- a/src/cmd/compile/internal/syntax/nodes.go
+++ b/src/cmd/compile/internal/syntax/nodes.go
@ -0,0 +1,425 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// ----------------------------------------------------------------------------
+// Nodes
+
+type Node interface {
+	aNode()
+}
+
+type node struct {
+	doc  *Comment // nil means no comment(s) attached
+	pos  uint32
+	line uint32
+}
+
+func (*node) aNode() {}
+
+func (n *node) init(p *parser) {
+	n.pos = uint32(p.pos)
+	n.line = uint32(p.line)
+}
+
+// ----------------------------------------------------------------------------
+// Files
+
+type File struct {
+	PkgName  *Name
+	DeclList []Decl
+	Pragmas  []Pragma
+	Lines    int
+	node
+}
+
+type Pragma struct {
+	Line int
+	Text string
+}
+
+// ----------------------------------------------------------------------------
+// Declarations
+
+type (
+	Decl interface {
+		Node
+		aDecl()
+	}
+
+	ImportDecl struct {
+		LocalPkgName *Name // including "."; nil means no rename present
+		Path         *BasicLit
+		Group        *Group // nil means not part of a group
+		decl
+	}
+
+	ConstDecl struct {
+		NameList []*Name
+		Type     Expr   // nil means no type
+		Values   Expr   // nil means no values
+		Group    *Group // nil means not part of a group
+		decl
+	}
+
+	TypeDecl struct {
+		Name  *Name
+		Type  Expr
+		Group *Group // nil means not part of a group
+		decl
+	}
+
+	VarDecl struct {
+		NameList []*Name
+		Type     Expr   // nil means no type
+		Values   Expr   // nil means no values
+		Group    *Group // nil means not part of a group
+		decl
+	}
+
+	FuncDecl struct {
+		Attr map[string]bool // go:attr map
+		Recv *Field          // nil means regular function
+		Name *Name
+		Type *FuncType
+		Body []Stmt // nil means no body (forward declaration)
+		decl
+	}
+)
+
+type decl struct{ node }
+
+func (*decl) aDecl() {}
+
+// All declarations belonging to the same group point to the same Group node.
+type Group struct {
+	dummy int // not empty so we are guaranteed different Group instances
+}
+
+// ----------------------------------------------------------------------------
+// Expressions
+
+type (
+	Expr interface {
+		Node
+		aExpr()
+	}
+
+	// Value
+	Name struct {
+		Value string
+		expr
+	}
+
+	// Value
+	BasicLit struct {
+		Value string
+		Kind  LitKind
+		expr
+	}
+
+	// Type { ElemList[0], ElemList[1], ... }
+	CompositeLit struct {
+		Type     Expr // nil means no literal type
+		ElemList []Expr
+		NKeys    int // number of elements with keys
+		expr
+	}
+
+	// Key: Value
+	KeyValueExpr struct {
+		Key, Value Expr
+		expr
+	}
+
+	// func Type { Body }
+	FuncLit struct {
+		Type *FuncType
+		Body []Stmt
+		expr
+	}
+
+	// (X)
+	ParenExpr struct {
+		X Expr
+		expr
+	}
+
+	// X.Sel
+	SelectorExpr struct {
+		X   Expr
+		Sel *Name
+		expr
+	}
+
+	// X[Index]
+	IndexExpr struct {
+		X     Expr
+		Index Expr
+		expr
+	}
+
+	// X[Index[0] : Index[1] : Index[2]]
+	SliceExpr struct {
+		X     Expr
+		Index [3]Expr
+		expr
+	}
+
+	// X.(Type)
+	AssertExpr struct {
+		X Expr
+		// TODO(gri) consider using Name{"..."} instead of nil (permits attaching of comments)
+		Type Expr
+		expr
+	}
+
+	Operation struct {
+		Op   Operator
+		X, Y Expr // Y == nil means unary expression
+		expr
+	}
+
+	// Fun(ArgList[0], ArgList[1], ...)
+	CallExpr struct {
+		Fun     Expr
+		ArgList []Expr
+		HasDots bool // last argument is followed by ...
+		expr
+	}
+
+	// ElemList[0], ElemList[1], ...
+	ListExpr struct {
+		ElemList []Expr
+		expr
+	}
+
+	// [Len]Elem
+	ArrayType struct {
+		// TODO(gri) consider using Name{"..."} instead of nil (permits attaching of comments)
+		Len  Expr // nil means Len is ...
+		Elem Expr
+		expr
+	}
+
+	// []Elem
+	SliceType struct {
+		Elem Expr
+		expr
+	}
+
+	// ...Elem
+	DotsType struct {
+		Elem Expr
+		expr
+	}
+
+	// struct { FieldList[0] TagList[0]; FieldList[1] TagList[1]; ... }
+	StructType struct {
+		FieldList []*Field
+		TagList   []*BasicLit // i >= len(TagList) || TagList[i] == nil means no tag for field i
+		expr
+	}
+
+	// Name Type
+	//      Type
+	Field struct {
+		Name *Name // nil means anonymous field/parameter (structs/parameters), or embedded interface (interfaces)
+		Type Expr  // field names declared in a list share the same Type (identical pointers)
+		node
+	}
+
+	// interface { MethodList[0]; MethodList[1]; ... }
+	InterfaceType struct {
+		MethodList []*Field
+		expr
+	}
+
+	FuncType struct {
+		ParamList  []*Field
+		ResultList []*Field
+		expr
+	}
+
+	// map[Key]Value
+	MapType struct {
+		Key   Expr
+		Value Expr
+		expr
+	}
+
+	//   chan Elem
+	// <-chan Elem
+	// chan<- Elem
+	ChanType struct {
+		Dir  ChanDir // 0 means no direction
+		Elem Expr
+		expr
+	}
+)
+
+type expr struct{ node }
+
+func (*expr) aExpr() {}
+
+type ChanDir uint
+
+const (
+	_ ChanDir = iota
+	SendOnly
+	RecvOnly
+)
+
+// ----------------------------------------------------------------------------
+// Statements
+
+type (
+	Stmt interface {
+		Node
+		aStmt()
+	}
+
+	SimpleStmt interface {
+		Stmt
+		aSimpleStmt()
+	}
+
+	EmptyStmt struct {
+		simpleStmt
+	}
+
+	LabeledStmt struct {
+		Label *Name
+		Stmt  Stmt
+		stmt
+	}
+
+	BlockStmt struct {
+		Body []Stmt
+		stmt
+	}
+
+	ExprStmt struct {
+		X Expr
+		simpleStmt
+	}
+
+	SendStmt struct {
+		Chan, Value Expr // Chan <- Value
+		simpleStmt
+	}
+
+	DeclStmt struct {
+		DeclList []Decl
+		stmt
+	}
+
+	AssignStmt struct {
+		Op       Operator // 0 means no operation
+		Lhs, Rhs Expr     // Rhs == ImplicitOne means Lhs++ (Op == Add) or Lhs-- (Op == Sub)
+		simpleStmt
+	}
+
+	BranchStmt struct {
+		Tok   token // Break, Continue, Fallthrough, or Goto
+		Label *Name
+		stmt
+	}
+
+	CallStmt struct {
+		Tok  token // Go or Defer
+		Call *CallExpr
+		stmt
+	}
+
+	ReturnStmt struct {
+		Results Expr // nil means no explicit return values
+		stmt
+	}
+
+	IfStmt struct {
+		Init SimpleStmt
+		Cond Expr
+		Then []Stmt
+		Else Stmt // either *IfStmt or *BlockStmt
+		stmt
+	}
+
+	ForStmt struct {
+		Init SimpleStmt // incl. *RangeClause
+		Cond Expr
+		Post SimpleStmt
+		Body []Stmt
+		stmt
+	}
+
+	SwitchStmt struct {
+		Init SimpleStmt
+		Tag  Expr
+		Body []*CaseClause
+		stmt
+	}
+
+	SelectStmt struct {
+		Body []*CommClause
+		stmt
+	}
+)
+
+type (
+	RangeClause struct {
+		Lhs Expr // nil means no Lhs = or Lhs :=
+		Def bool // means :=
+		X   Expr // range X
+		simpleStmt
+	}
+
+	TypeSwitchGuard struct {
+		// TODO(gri) consider using Name{"..."} instead of nil (permits attaching of comments)
+		Lhs *Name // nil means no Lhs :=
+		X   Expr  // X.(type)
+		expr
+	}
+
+	CaseClause struct {
+		Cases Expr // nil means default clause
+		Body  []Stmt
+		node
+	}
+
+	CommClause struct {
+		Comm SimpleStmt // send or receive stmt; nil means default clause
+		Body []Stmt
+		node
+	}
+)
+
+type stmt struct{ node }
+
+func (stmt) aStmt() {}
+
+type simpleStmt struct {
+	stmt
+}
+
+func (simpleStmt) aSimpleStmt() {}
+
+// ----------------------------------------------------------------------------
+// Comments
+
+type CommentKind uint
+
+const (
+	Above CommentKind = iota
+	Below
+	Left
+	Right
+)
+
+type Comment struct {
+	Kind CommentKind
+	Text string
+	Next *Comment
+}
--- a/src/cmd/compile/internal/syntax/parser.go
+++ b/src/cmd/compile/internal/syntax/parser.go
--- a/src/cmd/compile/internal/syntax/parser_test.go
+++ b/src/cmd/compile/internal/syntax/parser_test.go
@ -0,0 +1,157 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+)
+
+var fast = flag.Bool("fast", false, "parse package files in parallel")
+var src = flag.String("src", "parser.go", "source file to parse")
+var verify = flag.Bool("verify", false, "verify idempotent printing")
+
+func TestParse(t *testing.T) {
+	_, err := ReadFile(*src, nil, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestStdLib(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode")
+	}
+
+	var m1 runtime.MemStats
+	runtime.ReadMemStats(&m1)
+	start := time.Now()
+
+	type parseResult struct {
+		filename string
+		lines    int
+	}
+
+	results := make(chan parseResult)
+	go func() {
+		for _, dir := range []string{
+			runtime.GOROOT(),
+			//"/Users/gri/src",
+		} {
+			walkDirs(t, dir, func(filename string) {
+				if debug {
+					fmt.Printf("parsing %s\n", filename)
+				}
+				ast, err := ReadFile(filename, nil, 0)
+				if err != nil {
+					t.Fatal(err)
+				}
+				if *verify {
+					verifyPrint(filename, ast)
+				}
+				results <- parseResult{filename, ast.Lines}
+			})
+		}
+		close(results)
+	}()
+
+	var count, lines int
+	for res := range results {
+		count++
+		lines += res.lines
+		if testing.Verbose() {
+			fmt.Printf("%5d  %s (%d lines)\n", count, res.filename, res.lines)
+		}
+	}
+
+	dt := time.Since(start)
+	var m2 runtime.MemStats
+	runtime.ReadMemStats(&m2)
+	dm := float64(m2.TotalAlloc-m1.TotalAlloc) / 1e6
+
+	fmt.Printf("parsed %d lines (%d files) in %v (%d lines/s)\n", lines, count, dt, int64(float64(lines)/dt.Seconds()))
+	fmt.Printf("allocated %.3fMb (%.3fMb/s)\n", dm, dm/dt.Seconds())
+}
+
+func walkDirs(t *testing.T, dir string, action func(string)) {
+	fis, err := ioutil.ReadDir(dir)
+	if err != nil {
+		t.Error(err)
+		return
+	}
+
+	var files, dirs []string
+	for _, fi := range fis {
+		if fi.Mode().IsRegular() {
+			if strings.HasSuffix(fi.Name(), ".go") {
+				path := filepath.Join(dir, fi.Name())
+				files = append(files, path)
+			}
+		} else if fi.IsDir() && fi.Name() != "testdata" {
+			path := filepath.Join(dir, fi.Name())
+			if !strings.Contains(path, "go/test") {
+				dirs = append(dirs, path)
+			}
+		}
+	}
+
+	if *fast {
+		var wg sync.WaitGroup
+		wg.Add(len(files))
+		for _, filename := range files {
+			go func(filename string) {
+				defer wg.Done()
+				action(filename)
+			}(filename)
+		}
+		wg.Wait()
+	} else {
+		for _, filename := range files {
+			action(filename)
+		}
+	}
+
+	for _, dir := range dirs {
+		walkDirs(t, dir, action)
+	}
+}
+
+func verifyPrint(filename string, ast1 *File) {
+	var buf1 bytes.Buffer
+	_, err := Fprint(&buf1, ast1, true)
+	if err != nil {
+		panic(err)
+	}
+
+	ast2, err := ReadBytes(buf1.Bytes(), nil, 0)
+	if err != nil {
+		panic(err)
+	}
+
+	var buf2 bytes.Buffer
+	_, err = Fprint(&buf2, ast2, true)
+	if err != nil {
+		panic(err)
+	}
+
+	if bytes.Compare(buf1.Bytes(), buf2.Bytes()) != 0 {
+		fmt.Printf("--- %s ---\n", filename)
+		fmt.Printf("%s\n", buf1.Bytes())
+		fmt.Println()
+
+		fmt.Printf("--- %s ---\n", filename)
+		fmt.Printf("%s\n", buf2.Bytes())
+		fmt.Println()
+		panic("not equal")
+	}
+}
--- a/src/cmd/compile/internal/syntax/printer.go
+++ b/src/cmd/compile/internal/syntax/printer.go
@ -0,0 +1,942 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements printing of syntax trees in source format.
+
+package syntax
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"strings"
+)
+
+// TODO(gri) Consider removing the linebreaks flag from this signature.
+// Its likely rarely used in common cases.
+
+func Fprint(w io.Writer, x Node, linebreaks bool) (n int, err error) {
+	p := printer{
+		output:     w,
+		linebreaks: linebreaks,
+	}
+
+	defer func() {
+		n = p.written
+		if e := recover(); e != nil {
+			err = e.(localError).err // re-panics if it's not a localError
+		}
+	}()
+
+	p.print(x)
+	p.flush(_EOF)
+
+	return
+}
+
+func String(n Node) string {
+	var buf bytes.Buffer
+	_, err := Fprint(&buf, n, false)
+	if err != nil {
+		panic(err) // TODO(gri) print something sensible into buf instead
+	}
+	return buf.String()
+}
+
+type ctrlSymbol int
+
+const (
+	none ctrlSymbol = iota
+	semi
+	blank
+	newline
+	indent
+	outdent
+	// comment
+	// eolComment
+)
+
+type whitespace struct {
+	last token
+	kind ctrlSymbol
+	//text string // comment text (possibly ""); valid if kind == comment
+}
+
+type printer struct {
+	output     io.Writer
+	written    int  // number of bytes written
+	linebreaks bool // print linebreaks instead of semis
+
+	indent  int // current indentation level
+	nlcount int // number of consecutive newlines
+
+	pending []whitespace // pending whitespace
+	lastTok token        // last token (after any pending semi) processed by print
+}
+
+// write is a thin wrapper around p.output.Write
+// that takes care of accounting and error handling.
+func (p *printer) write(data []byte) {
+	n, err := p.output.Write(data)
+	p.written += n
+	if err != nil {
+		panic(localError{err})
+	}
+}
+
+var (
+	tabBytes    = []byte("\t\t\t\t\t\t\t\t")
+	newlineByte = []byte("\n")
+	blankByte   = []byte(" ")
+)
+
+func (p *printer) writeBytes(data []byte) {
+	if len(data) == 0 {
+		panic("expected non-empty []byte")
+	}
+	if p.nlcount > 0 && p.indent > 0 {
+		// write indentation
+		n := p.indent
+		for n > len(tabBytes) {
+			p.write(tabBytes)
+			n -= len(tabBytes)
+		}
+		p.write(tabBytes[:n])
+	}
+	p.write(data)
+	p.nlcount = 0
+}
+
+func (p *printer) writeString(s string) {
+	p.writeBytes([]byte(s))
+}
+
+// If impliesSemi returns true for a non-blank line's final token tok,
+// a semicolon is automatically inserted. Vice versa, a semicolon may
+// be omitted in those cases.
+func impliesSemi(tok token) bool {
+	switch tok {
+	case _Name,
+		_Break, _Continue, _Fallthrough, _Return,
+		/*_Inc, _Dec,*/ _Rparen, _Rbrack, _Rbrace: // TODO(gri) fix this
+		return true
+	}
+	return false
+}
+
+// TODO(gri) provide table of []byte values for all tokens to avoid repeated string conversion
+
+func lineComment(text string) bool {
+	return strings.HasPrefix(text, "//")
+}
+
+func (p *printer) addWhitespace(kind ctrlSymbol, text string) {
+	p.pending = append(p.pending, whitespace{p.lastTok, kind /*text*/})
+	switch kind {
+	case semi:
+		p.lastTok = _Semi
+	case newline:
+		p.lastTok = 0
+		// TODO(gri) do we need to handle /*-style comments containing newlines here?
+	}
+}
+
+func (p *printer) flush(next token) {
+	// eliminate semis and redundant whitespace
+	sawNewline := next == _EOF
+	sawParen := next == _Rparen || next == _Rbrace
+	for i := len(p.pending) - 1; i >= 0; i-- {
+		switch p.pending[i].kind {
+		case semi:
+			k := semi
+			if sawParen {
+				sawParen = false
+				k = none // eliminate semi
+			} else if sawNewline && impliesSemi(p.pending[i].last) {
+				sawNewline = false
+				k = none // eliminate semi
+			}
+			p.pending[i].kind = k
+		case newline:
+			sawNewline = true
+		case blank, indent, outdent:
+			// nothing to do
+		// case comment:
+		// 	// A multi-line comment acts like a newline; and a ""
+		// 	// comment implies by definition at least one newline.
+		// 	if text := p.pending[i].text; strings.HasPrefix(text, "/*") && strings.ContainsRune(text, '\n') {
+		// 		sawNewline = true
+		// 	}
+		// case eolComment:
+		// 	// TODO(gri) act depending on sawNewline
+		default:
+			panic("unreachable")
+		}
+	}
+
+	// print pending
+	prev := none
+	for i := range p.pending {
+		switch p.pending[i].kind {
+		case none:
+			// nothing to do
+		case semi:
+			p.writeString(";")
+			p.nlcount = 0
+			prev = semi
+		case blank:
+			if prev != blank {
+				// at most one blank
+				p.writeBytes(blankByte)
+				p.nlcount = 0
+				prev = blank
+			}
+		case newline:
+			const maxEmptyLines = 1
+			if p.nlcount <= maxEmptyLines {
+				p.write(newlineByte)
+				p.nlcount++
+				prev = newline
+			}
+		case indent:
+			p.indent++
+		case outdent:
+			p.indent--
+			if p.indent < 0 {
+				panic("negative indentation")
+			}
+		// case comment:
+		// 	if text := p.pending[i].text; text != "" {
+		// 		p.writeString(text)
+		// 		p.nlcount = 0
+		// 		prev = comment
+		// 	}
+		// 	// TODO(gri) should check that line comments are always followed by newline
+		default:
+			panic("unreachable")
+		}
+	}
+
+	p.pending = p.pending[:0] // re-use underlying array
+}
+
+func mayCombine(prev token, next byte) (b bool) {
+	return // for now
+	// switch prev {
+	// case lexical.Int:
+	// 	b = next == '.' // 1.
+	// case lexical.Add:
+	// 	b = next == '+' // ++
+	// case lexical.Sub:
+	// 	b = next == '-' // --
+	// case lexical.Quo:
+	// 	b = next == '*' // /*
+	// case lexical.Lss:
+	// 	b = next == '-' || next == '<' // <- or <<
+	// case lexical.And:
+	// 	b = next == '&' || next == '^' // && or &^
+	// }
+	// return
+}
+
+func (p *printer) print(args ...interface{}) {
+	for i := 0; i < len(args); i++ {
+		switch x := args[i].(type) {
+		case nil:
+			// we should not reach here but don't crash
+
+		case Node:
+			p.printNode(x)
+
+		case token:
+			// _Name implies an immediately following string
+			// argument which is the actual value to print.
+			var s string
+			if x == _Name {
+				i++
+				if i >= len(args) {
+					panic("missing string argument after _Name")
+				}
+				s = args[i].(string)
+			} else {
+				s = x.String()
+			}
+
+			// TODO(gri) This check seems at the wrong place since it doesn't
+			//           take into account pending white space.
+			if mayCombine(p.lastTok, s[0]) {
+				panic("adjacent tokens combine without whitespace")
+			}
+
+			if x == _Semi {
+				// delay printing of semi
+				p.addWhitespace(semi, "")
+			} else {
+				p.flush(x)
+				p.writeString(s)
+				p.nlcount = 0
+				p.lastTok = x
+			}
+
+		case Operator:
+			if x != 0 {
+				p.flush(_Operator)
+				p.writeString(x.String())
+			}
+
+		case ctrlSymbol:
+			switch x {
+			case none, semi /*, comment*/ :
+				panic("unreachable")
+			case newline:
+				// TODO(gri) need to handle mandatory newlines after a //-style comment
+				if !p.linebreaks {
+					x = blank
+				}
+			}
+			p.addWhitespace(x, "")
+
+		// case *Comment: // comments are not Nodes
+		// 	p.addWhitespace(comment, x.Text)
+
+		default:
+			panic(fmt.Sprintf("unexpected argument %v (%T)", x, x))
+		}
+	}
+}
+
+func (p *printer) printNode(n Node) {
+	// ncom := *n.Comments()
+	// if ncom != nil {
+	// 	// TODO(gri) in general we cannot make assumptions about whether
+	// 	// a comment is a /*- or a //-style comment since the syntax
+	// 	// tree may have been manipulated. Need to make sure the correct
+	// 	// whitespace is emitted.
+	// 	for _, c := range ncom.Alone {
+	// 		p.print(c, newline)
+	// 	}
+	// 	for _, c := range ncom.Before {
+	// 		if c.Text == "" || lineComment(c.Text) {
+	// 			panic("unexpected empty line or //-style 'before' comment")
+	// 		}
+	// 		p.print(c, blank)
+	// 	}
+	// }
+
+	p.printRawNode(n)
+
+	// if ncom != nil && len(ncom.After) > 0 {
+	// 	for i, c := range ncom.After {
+	// 		if i+1 < len(ncom.After) {
+	// 			if c.Text == "" || lineComment(c.Text) {
+	// 				panic("unexpected empty line or //-style non-final 'after' comment")
+	// 			}
+	// 		}
+	// 		p.print(blank, c)
+	// 	}
+	// 	//p.print(newline)
+	// }
+}
+
+func (p *printer) printRawNode(n Node) {
+	switch n := n.(type) {
+	// expressions and types
+	case *Name:
+		p.print(_Name, n.Value) // _Name requires actual value following immediately
+
+	case *BasicLit:
+		p.print(_Name, n.Value) // _Name requires actual value following immediately
+
+	case *FuncLit:
+		p.print(n.Type, blank)
+		p.printBody(n.Body)
+
+	case *CompositeLit:
+		if n.Type != nil {
+			p.print(n.Type)
+		}
+		p.print(_Lbrace)
+		if n.NKeys > 0 && n.NKeys == len(n.ElemList) {
+			p.printExprLines(n.ElemList)
+		} else {
+			p.printExprList(n.ElemList)
+		}
+		p.print(_Rbrace)
+
+	case *ParenExpr:
+		p.print(_Lparen, n.X, _Rparen)
+
+	case *SelectorExpr:
+		p.print(n.X, _Dot, n.Sel)
+
+	case *IndexExpr:
+		p.print(n.X, _Lbrack, n.Index, _Rbrack)
+
+	case *SliceExpr:
+		p.print(n.X, _Lbrack)
+		if i := n.Index[0]; i != nil {
+			p.printNode(i)
+		}
+		p.print(_Colon)
+		if j := n.Index[1]; j != nil {
+			p.printNode(j)
+		}
+		if k := n.Index[2]; k != nil {
+			p.print(_Colon, k)
+		}
+		p.print(_Rbrack)
+
+	case *AssertExpr:
+		p.print(n.X, _Dot, _Lparen)
+		if n.Type != nil {
+			p.printNode(n.Type)
+		} else {
+			p.print(_Type)
+		}
+		p.print(_Rparen)
+
+	case *CallExpr:
+		p.print(n.Fun, _Lparen)
+		p.printExprList(n.ArgList)
+		if n.HasDots {
+			p.print(_DotDotDot)
+		}
+		p.print(_Rparen)
+
+	case *Operation:
+		if n.Y == nil {
+			// unary expr
+			p.print(n.Op)
+			// if n.Op == lexical.Range {
+			// 	p.print(blank)
+			// }
+			p.print(n.X)
+		} else {
+			// binary expr
+			// TODO(gri) eventually take precedence into account
+			// to control possibly missing parentheses
+			p.print(n.X, blank, n.Op, blank, n.Y)
+		}
+
+	case *KeyValueExpr:
+		p.print(n.Key, _Colon, blank, n.Value)
+
+	case *ListExpr:
+		p.printExprList(n.ElemList)
+
+	case *ArrayType:
+		var len interface{} = _DotDotDot
+		if n.Len != nil {
+			len = n.Len
+		}
+		p.print(_Lbrack, len, _Rbrack, n.Elem)
+
+	case *SliceType:
+		p.print(_Lbrack, _Rbrack, n.Elem)
+
+	case *DotsType:
+		p.print(_DotDotDot, n.Elem)
+
+	case *StructType:
+		p.print(_Struct)
+		if len(n.FieldList) > 0 && p.linebreaks {
+			p.print(blank)
+		}
+		p.print(_Lbrace)
+		if len(n.FieldList) > 0 {
+			p.print(newline, indent)
+			p.printFieldList(n.FieldList, n.TagList)
+			p.print(outdent, newline)
+		}
+		p.print(_Rbrace)
+
+	case *FuncType:
+		p.print(_Func)
+		p.printSignature(n)
+
+	case *InterfaceType:
+		p.print(_Interface)
+		if len(n.MethodList) > 0 && p.linebreaks {
+			p.print(blank)
+		}
+		p.print(_Lbrace)
+		if len(n.MethodList) > 0 {
+			p.print(newline, indent)
+			p.printMethodList(n.MethodList)
+			p.print(outdent, newline)
+		}
+		p.print(_Rbrace)
+
+	case *MapType:
+		p.print(_Map, _Lbrack, n.Key, _Rbrack, n.Value)
+
+	case *ChanType:
+		if n.Dir == RecvOnly {
+			p.print(_Arrow)
+		}
+		p.print(_Chan)
+		if n.Dir == SendOnly {
+			p.print(_Arrow)
+		}
+		p.print(blank, n.Elem)
+
+	// statements
+	case *DeclStmt:
+		p.printDecl(n.DeclList)
+
+	case *EmptyStmt:
+		// nothing to print
+
+	case *LabeledStmt:
+		p.print(outdent, n.Label, _Colon, indent, newline, n.Stmt)
+
+	case *ExprStmt:
+		p.print(n.X)
+
+	case *SendStmt:
+		p.print(n.Chan, blank, _Arrow, blank, n.Value)
+
+	case *AssignStmt:
+		p.print(n.Lhs)
+		if n.Rhs == ImplicitOne {
+			// TODO(gri) This is going to break the mayCombine
+			//           check once we enable that again.
+			p.print(n.Op, n.Op) // ++ or --
+		} else {
+			p.print(blank, n.Op, _Assign, blank)
+			p.print(n.Rhs)
+		}
+
+	case *CallStmt:
+		p.print(n.Tok, blank, n.Call)
+
+	case *ReturnStmt:
+		p.print(_Return)
+		if n.Results != nil {
+			p.print(blank, n.Results)
+		}
+
+	case *BranchStmt:
+		p.print(n.Tok)
+		if n.Label != nil {
+			p.print(blank, n.Label)
+		}
+
+	case *BlockStmt:
+		p.printBody(n.Body)
+
+	case *IfStmt:
+		p.print(_If, blank)
+		if n.Init != nil {
+			p.print(n.Init, _Semi, blank)
+		}
+		p.print(n.Cond, blank)
+		p.printBody(n.Then)
+		if n.Else != nil {
+			p.print(blank, _Else, blank, n.Else)
+		}
+
+	case *SwitchStmt:
+		p.print(_Switch, blank)
+		if n.Init != nil {
+			p.print(n.Init, _Semi, blank)
+		}
+		if n.Tag != nil {
+			p.print(n.Tag, blank)
+		}
+		p.printSwitchBody(n.Body)
+
+	case *TypeSwitchGuard:
+		if n.Lhs != nil {
+			p.print(n.Lhs, blank, _Define, blank)
+		}
+		p.print(n.X, _Dot, _Lparen, _Type, _Rparen)
+
+	case *SelectStmt:
+		p.print(_Select, blank) // for now
+		p.printSelectBody(n.Body)
+
+	case *RangeClause:
+		if n.Lhs != nil {
+			tok := _Assign
+			if n.Def {
+				tok = _Define
+			}
+			p.print(n.Lhs, blank, tok, blank)
+		}
+		p.print(_Range, blank, n.X)
+
+	case *ForStmt:
+		p.print(_For, blank)
+		if n.Init == nil && n.Post == nil {
+			if n.Cond != nil {
+				p.print(n.Cond, blank)
+			}
+		} else {
+			if n.Init != nil {
+				p.print(n.Init)
+				// TODO(gri) clean this up
+				if _, ok := n.Init.(*RangeClause); ok {
+					p.print(blank)
+					p.printBody(n.Body)
+					break
+				}
+			}
+			p.print(_Semi, blank)
+			if n.Cond != nil {
+				p.print(n.Cond)
+			}
+			p.print(_Semi, blank)
+			if n.Post != nil {
+				p.print(n.Post, blank)
+			}
+		}
+		p.printBody(n.Body)
+
+	case *ImportDecl:
+		if n.Group == nil {
+			p.print(_Import, blank)
+		}
+		if n.LocalPkgName != nil {
+			p.print(n.LocalPkgName, blank)
+		}
+		p.print(n.Path)
+
+	case *ConstDecl:
+		if n.Group == nil {
+			p.print(_Const, blank)
+		}
+		p.printNameList(n.NameList)
+		if n.Type != nil {
+			p.print(blank, n.Type)
+		}
+		if n.Values != nil {
+			p.print(blank, _Assign, blank, n.Values)
+		}
+
+	case *TypeDecl:
+		if n.Group == nil {
+			p.print(_Type, blank)
+		}
+		p.print(n.Name, blank, n.Type)
+
+	case *VarDecl:
+		if n.Group == nil {
+			p.print(_Var, blank)
+		}
+		p.printNameList(n.NameList)
+		if n.Type != nil {
+			p.print(blank, n.Type)
+		}
+		if n.Values != nil {
+			p.print(blank, _Assign, blank, n.Values)
+		}
+
+	case *FuncDecl:
+		p.print(_Func, blank)
+		if r := n.Recv; r != nil {
+			p.print(_Lparen)
+			if r.Name != nil {
+				p.print(r.Name, blank)
+			}
+			p.printNode(r.Type)
+			p.print(_Rparen, blank)
+		}
+		p.print(n.Name)
+		p.printSignature(n.Type)
+		if n.Body != nil {
+			p.print(blank)
+			p.printBody(n.Body)
+		}
+
+	case *printGroup:
+		p.print(n.Tok, blank, _Lparen)
+		if len(n.Decls) > 0 {
+			p.print(newline, indent)
+			for _, d := range n.Decls {
+				p.printNode(d)
+				p.print(_Semi, newline)
+			}
+			p.print(outdent)
+		}
+		p.print(_Rparen)
+
+	// files
+	case *File:
+		p.print(_Package, blank, n.PkgName)
+		if len(n.DeclList) > 0 {
+			p.print(_Semi, newline, newline)
+			p.printDeclList(n.DeclList)
+		}
+
+	default:
+		panic(fmt.Sprintf("syntax.Iterate: unexpected node type %T", n))
+	}
+}
+
+func (p *printer) printFields(fields []*Field, tags []*BasicLit, i, j int) {
+	if i+1 == j && fields[i].Name == nil {
+		// anonymous field
+		p.printNode(fields[i].Type)
+	} else {
+		for k, f := range fields[i:j] {
+			if k > 0 {
+				p.print(_Comma, blank)
+			}
+			p.printNode(f.Name)
+		}
+		p.print(blank)
+		p.printNode(fields[i].Type)
+	}
+	if i < len(tags) && tags[i] != nil {
+		p.print(blank)
+		p.printNode(tags[i])
+	}
+}
+
+func (p *printer) printFieldList(fields []*Field, tags []*BasicLit) {
+	i0 := 0
+	var typ Expr
+	for i, f := range fields {
+		if f.Name == nil || f.Type != typ {
+			if i0 < i {
+				p.printFields(fields, tags, i0, i)
+				p.print(_Semi, newline)
+				i0 = i
+			}
+			typ = f.Type
+		}
+	}
+	p.printFields(fields, tags, i0, len(fields))
+}
+
+func (p *printer) printMethodList(methods []*Field) {
+	for i, m := range methods {
+		if i > 0 {
+			p.print(_Semi, newline)
+		}
+		if m.Name != nil {
+			p.printNode(m.Name)
+			p.printSignature(m.Type.(*FuncType))
+		} else {
+			p.printNode(m.Type)
+		}
+	}
+}
+
+func (p *printer) printNameList(list []*Name) {
+	for i, x := range list {
+		if i > 0 {
+			p.print(_Comma, blank)
+		}
+		p.printNode(x)
+	}
+}
+
+func (p *printer) printExprList(list []Expr) {
+	for i, x := range list {
+		if i > 0 {
+			p.print(_Comma, blank)
+		}
+		p.printNode(x)
+	}
+}
+
+func (p *printer) printExprLines(list []Expr) {
+	if len(list) > 0 {
+		p.print(newline, indent)
+		for _, x := range list {
+			p.print(x, _Comma, newline)
+		}
+		p.print(outdent)
+	}
+}
+
+func groupFor(d Decl) (token, *Group) {
+	switch d := d.(type) {
+	case *ImportDecl:
+		return _Import, d.Group
+	case *ConstDecl:
+		return _Const, d.Group
+	case *TypeDecl:
+		return _Type, d.Group
+	case *VarDecl:
+		return _Var, d.Group
+	case *FuncDecl:
+		return _Func, nil
+	default:
+		panic("unreachable")
+	}
+}
+
+type printGroup struct {
+	node
+	Tok   token
+	Decls []Decl
+}
+
+func (p *printer) printDecl(list []Decl) {
+	tok, group := groupFor(list[0])
+
+	if group == nil {
+		if len(list) != 1 {
+			panic("unreachable")
+		}
+		p.printNode(list[0])
+		return
+	}
+
+	// if _, ok := list[0].(*EmptyDecl); ok {
+	// 	if len(list) != 1 {
+	// 		panic("unreachable")
+	// 	}
+	// 	// TODO(gri) if there are comments inside the empty
+	// 	// group, we may need to keep the list non-nil
+	// 	list = nil
+	// }
+
+	// printGroup is here for consistent comment handling
+	// (this is not yet used)
+	var pg printGroup
+	// *pg.Comments() = *group.Comments()
+	pg.Tok = tok
+	pg.Decls = list
+	p.printNode(&pg)
+}
+
+func (p *printer) printDeclList(list []Decl) {
+	i0 := 0
+	var tok token
+	var group *Group
+	for i, x := range list {
+		if s, g := groupFor(x); g == nil || g != group {
+			if i0 < i {
+				p.printDecl(list[i0:i])
+				p.print(_Semi, newline)
+				// print empty line between different declaration groups,
+				// different kinds of declarations, or between functions
+				if g != group || s != tok || s == _Func {
+					p.print(newline)
+				}
+				i0 = i
+			}
+			tok, group = s, g
+		}
+	}
+	p.printDecl(list[i0:])
+}
+
+func (p *printer) printSignature(sig *FuncType) {
+	p.printParameterList(sig.ParamList)
+	if list := sig.ResultList; list != nil {
+		p.print(blank)
+		if len(list) == 1 && list[0].Name == nil {
+			p.printNode(list[0].Type)
+		} else {
+			p.printParameterList(list)
+		}
+	}
+}
+
+func (p *printer) printParameterList(list []*Field) {
+	p.print(_Lparen)
+	if len(list) > 0 {
+		for i, f := range list {
+			if i > 0 {
+				p.print(_Comma, blank)
+			}
+			if f.Name != nil {
+				p.printNode(f.Name)
+				if i+1 < len(list) {
+					f1 := list[i+1]
+					if f1.Name != nil && f1.Type == f.Type {
+						continue // no need to print type
+					}
+				}
+				p.print(blank)
+			}
+			p.printNode(f.Type)
+		}
+	}
+	p.print(_Rparen)
+}
+
+func (p *printer) printStmtList(list []Stmt, braces bool) {
+	for i, x := range list {
+		p.print(x, _Semi)
+		if i+1 < len(list) {
+			p.print(newline)
+		} else if braces {
+			// Print an extra semicolon if the last statement is
+			// an empty statement and we are in a braced block
+			// because one semicolon is automatically removed.
+			if _, ok := x.(*EmptyStmt); ok {
+				p.print(x, _Semi)
+			}
+		}
+	}
+}
+
+func (p *printer) printBody(list []Stmt) {
+	p.print(_Lbrace)
+	if len(list) > 0 {
+		p.print(newline, indent)
+		p.printStmtList(list, true)
+		p.print(outdent, newline)
+	}
+	p.print(_Rbrace)
+}
+
+func (p *printer) printSwitchBody(list []*CaseClause) {
+	p.print(_Lbrace)
+	if len(list) > 0 {
+		p.print(newline)
+		for i, c := range list {
+			p.printCaseClause(c, i+1 == len(list))
+			p.print(newline)
+		}
+	}
+	p.print(_Rbrace)
+}
+
+func (p *printer) printSelectBody(list []*CommClause) {
+	p.print(_Lbrace)
+	if len(list) > 0 {
+		p.print(newline)
+		for i, c := range list {
+			p.printCommClause(c, i+1 == len(list))
+			p.print(newline)
+		}
+	}
+	p.print(_Rbrace)
+}
+
+func (p *printer) printCaseClause(c *CaseClause, braces bool) {
+	if c.Cases != nil {
+		p.print(_Case, blank, c.Cases)
+	} else {
+		p.print(_Default)
+	}
+	p.print(_Colon)
+	if len(c.Body) > 0 {
+		p.print(newline, indent)
+		p.printStmtList(c.Body, braces)
+		p.print(outdent)
+	}
+}
+
+func (p *printer) printCommClause(c *CommClause, braces bool) {
+	if c.Comm != nil {
+		p.print(_Case, blank)
+		p.print(c.Comm)
+	} else {
+		p.print(_Default)
+	}
+	p.print(_Colon)
+	if len(c.Body) > 0 {
+		p.print(newline, indent)
+		p.printStmtList(c.Body, braces)
+		p.print(outdent)
+	}
+}
--- a/src/cmd/compile/internal/syntax/printer_test.go
+++ b/src/cmd/compile/internal/syntax/printer_test.go
@ -0,0 +1,24 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"fmt"
+	"os"
+	"testing"
+)
+
+func TestPrint(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode")
+	}
+
+	ast, err := ReadFile(*src, nil, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	Fprint(os.Stdout, ast, true)
+	fmt.Println()
+}
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@ -0,0 +1,651 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"fmt"
+	"io"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+type scanner struct {
+	source
+	nlsemi bool // if set '\n' and EOF translate to ';'
+
+	// current token, valid after calling next()
+	pos, line int
+	tok       token
+	lit       string   // valid if tok is _Name or _Literal
+	kind      LitKind  // valid if tok is _Literal
+	op        Operator // valid if tok is _Operator, _AssignOp, or _IncOp
+	prec      int      // valid if tok is _Operator, _AssignOp, or _IncOp
+
+	pragmas []Pragma
+}
+
+func (s *scanner) init(src io.Reader, errh ErrorHandler) {
+	s.source.init(src, errh)
+	s.nlsemi = false
+}
+
+func (s *scanner) next() {
+	nlsemi := s.nlsemi
+	s.nlsemi = false
+
+redo:
+	// skip white space
+	c := s.getr()
+	for c == ' ' || c == '\t' || c == '\n' && !nlsemi || c == '\r' {
+		c = s.getr()
+	}
+
+	// token start
+	s.pos, s.line = s.source.pos0(), s.source.line0
+
+	if isLetter(c) || c >= utf8.RuneSelf && unicode.IsLetter(c) {
+		s.ident()
+		return
+	}
+
+	switch c {
+	case -1:
+		if nlsemi {
+			s.tok = _Semi
+			break
+		}
+		s.tok = _EOF
+
+	case '\n':
+		s.tok = _Semi
+
+	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+		s.number(c)
+
+	case '"':
+		s.stdString()
+
+	case '`':
+		s.rawString()
+
+	case '\'':
+		s.rune()
+
+	case '(':
+		s.tok = _Lparen
+
+	case '[':
+		s.tok = _Lbrack
+
+	case '{':
+		s.tok = _Lbrace
+
+	case ',':
+		s.tok = _Comma
+
+	case ';':
+		s.tok = _Semi
+
+	case ')':
+		s.nlsemi = true
+		s.tok = _Rparen
+
+	case ']':
+		s.nlsemi = true
+		s.tok = _Rbrack
+
+	case '}':
+		s.nlsemi = true
+		s.tok = _Rbrace
+
+	case ':':
+		if s.getr() == '=' {
+			s.tok = _Define
+			break
+		}
+		s.ungetr()
+		s.tok = _Colon
+
+	case '.':
+		c = s.getr()
+		if isDigit(c) {
+			s.ungetr()
+			s.source.r0-- // make sure '.' is part of literal (line cannot have changed)
+			s.number('.')
+			break
+		}
+		if c == '.' {
+			c = s.getr()
+			if c == '.' {
+				s.tok = _DotDotDot
+				break
+			}
+			s.ungetr()
+			s.source.r0-- // make next ungetr work (line cannot have changed)
+		}
+		s.ungetr()
+		s.tok = _Dot
+
+	case '+':
+		s.op, s.prec = Add, precAdd
+		c = s.getr()
+		if c != '+' {
+			goto assignop
+		}
+		s.nlsemi = true
+		s.tok = _IncOp
+
+	case '-':
+		s.op, s.prec = Sub, precAdd
+		c = s.getr()
+		if c != '-' {
+			goto assignop
+		}
+		s.nlsemi = true
+		s.tok = _IncOp
+
+	case '*':
+		s.op, s.prec = Mul, precMul
+		// don't goto assignop - want _Star token
+		if s.getr() == '=' {
+			s.tok = _AssignOp
+			break
+		}
+		s.ungetr()
+		s.tok = _Star
+
+	case '/':
+		c = s.getr()
+		if c == '/' {
+			s.lineComment()
+			goto redo
+		}
+		if c == '*' {
+			s.fullComment()
+			if s.source.line > s.line && nlsemi {
+				// A multi-line comment acts like a newline;
+				// it translates to a ';' if nlsemi is set.
+				s.tok = _Semi
+				break
+			}
+			goto redo
+		}
+		s.op, s.prec = Div, precMul
+		goto assignop
+
+	case '%':
+		s.op, s.prec = Rem, precMul
+		c = s.getr()
+		goto assignop
+
+	case '&':
+		c = s.getr()
+		if c == '&' {
+			s.op, s.prec = AndAnd, precAndAnd
+			s.tok = _Operator
+			break
+		}
+		s.op, s.prec = And, precMul
+		if c == '^' {
+			s.op = AndNot
+			c = s.getr()
+		}
+		goto assignop
+
+	case '|':
+		c = s.getr()
+		if c == '|' {
+			s.op, s.prec = OrOr, precOrOr
+			s.tok = _Operator
+			break
+		}
+		s.op, s.prec = Or, precAdd
+		goto assignop
+
+	case '~':
+		s.error("bitwise complement operator is ^")
+		fallthrough
+
+	case '^':
+		s.op, s.prec = Xor, precAdd
+		c = s.getr()
+		goto assignop
+
+	case '<':
+		c = s.getr()
+		if c == '=' {
+			s.op, s.prec = Leq, precCmp
+			s.tok = _Operator
+			break
+		}
+		if c == '<' {
+			s.op, s.prec = Shl, precMul
+			c = s.getr()
+			goto assignop
+		}
+		if c == '-' {
+			s.tok = _Arrow
+			break
+		}
+		s.ungetr()
+		s.op, s.prec = Lss, precCmp
+		s.tok = _Operator
+
+	case '>':
+		c = s.getr()
+		if c == '=' {
+			s.op, s.prec = Geq, precCmp
+			s.tok = _Operator
+			break
+		}
+		if c == '>' {
+			s.op, s.prec = Shr, precMul
+			c = s.getr()
+			goto assignop
+		}
+		s.ungetr()
+		s.op, s.prec = Gtr, precCmp
+		s.tok = _Operator
+
+	case '=':
+		if s.getr() == '=' {
+			s.op, s.prec = Eql, precCmp
+			s.tok = _Operator
+			break
+		}
+		s.ungetr()
+		s.tok = _Assign
+
+	case '!':
+		if s.getr() == '=' {
+			s.op, s.prec = Neq, precCmp
+			s.tok = _Operator
+			break
+		}
+		s.ungetr()
+		s.op, s.prec = Not, 0
+		s.tok = _Operator
+
+	default:
+		s.tok = 0
+		s.error(fmt.Sprintf("invalid rune %q", c))
+		goto redo
+	}
+
+	return
+
+assignop:
+	if c == '=' {
+		s.tok = _AssignOp
+		return
+	}
+	s.ungetr()
+	s.tok = _Operator
+}
+
+func isLetter(c rune) bool {
+	return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
+}
+
+func isDigit(c rune) bool {
+	return '0' <= c && c <= '9'
+}
+
+func (s *scanner) ident() {
+	s.startLit()
+
+	// accelerate common case (7bit ASCII)
+	c := s.getr()
+	for isLetter(c) || isDigit(c) {
+		c = s.getr()
+	}
+
+	// general case
+	if c >= utf8.RuneSelf {
+		for unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) {
+			c = s.getr()
+		}
+	}
+	s.ungetr()
+
+	lit := s.stopLit()
+
+	// possibly a keyword
+	if len(lit) >= 2 {
+		if tok := keywordMap[hash(lit)]; tok != 0 && strbyteseql(tokstrings[tok], lit) {
+			s.nlsemi = contains(1<<_Break|1<<_Continue|1<<_Fallthrough|1<<_Return, tok)
+			s.tok = tok
+			return
+		}
+	}
+
+	s.nlsemi = true
+	s.lit = string(lit)
+	s.tok = _Name
+}
+
+// hash is a perfect hash function for keywords.
+// It assumes that s has at least length 2.
+func hash(s []byte) uint {
+	return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1)
+}
+
+func strbyteseql(s string, b []byte) bool {
+	if len(s) == len(b) {
+		for i, b := range b {
+			if s[i] != b {
+				return false
+			}
+		}
+		return true
+	}
+	return false
+}
+
+var keywordMap [1 << 6]token // size must be power of two
+
+func init() {
+	// populate keywordMap
+	for tok := _Break; tok <= _Var; tok++ {
+		h := hash([]byte(tokstrings[tok]))
+		if keywordMap[h] != 0 {
+			panic("imperfect hash")
+		}
+		keywordMap[h] = tok
+	}
+}
+
+func (s *scanner) number(c rune) {
+	s.startLit()
+
+	if c != '.' {
+		s.kind = IntLit // until proven otherwise
+		if c == '0' {
+			c = s.getr()
+			if c == 'x' || c == 'X' {
+				// hex
+				c = s.getr()
+				hasDigit := false
+				for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+					c = s.getr()
+					hasDigit = true
+				}
+				if !hasDigit {
+					s.error("malformed hex constant")
+				}
+				goto done
+			}
+
+			// decimal 0, octal, or float
+			has8or9 := false
+			for isDigit(c) {
+				if c > '7' {
+					has8or9 = true
+				}
+				c = s.getr()
+			}
+			if c != '.' && c != 'e' && c != 'E' && c != 'i' {
+				// octal
+				if has8or9 {
+					s.error("malformed octal constant")
+				}
+				goto done
+			}
+
+		} else {
+			// decimal or float
+			for isDigit(c) {
+				c = s.getr()
+			}
+		}
+	}
+
+	// float
+	if c == '.' {
+		s.kind = FloatLit
+		c = s.getr()
+		for isDigit(c) {
+			c = s.getr()
+		}
+	}
+
+	// exponent
+	if c == 'e' || c == 'E' {
+		s.kind = FloatLit
+		c = s.getr()
+		if c == '-' || c == '+' {
+			c = s.getr()
+		}
+		if !isDigit(c) {
+			s.error("malformed floating-point constant exponent")
+		}
+		for isDigit(c) {
+			c = s.getr()
+		}
+	}
+
+	// complex
+	if c == 'i' {
+		s.kind = ImagLit
+		s.getr()
+	}
+
+done:
+	s.ungetr()
+	s.nlsemi = true
+	s.lit = string(s.stopLit())
+	s.tok = _Literal
+}
+
+func (s *scanner) stdString() {
+	s.startLit()
+
+	for {
+		r := s.getr()
+		if r == '"' {
+			break
+		}
+		if r == '\\' {
+			s.escape('"')
+			continue
+		}
+		if r == '\n' {
+			s.ungetr() // assume newline is not part of literal
+			s.error("newline in string")
+			break
+		}
+		if r < 0 {
+			s.error_at(s.pos, s.line, "string not terminated")
+			break
+		}
+	}
+
+	s.nlsemi = true
+	s.lit = string(s.stopLit())
+	s.kind = StringLit
+	s.tok = _Literal
+}
+
+func (s *scanner) rawString() {
+	s.startLit()
+
+	for {
+		r := s.getr()
+		if r == '`' {
+			break
+		}
+		if r < 0 {
+			s.error_at(s.pos, s.line, "string not terminated")
+			break
+		}
+	}
+	// We leave CRs in the string since they are part of the
+	// literal (even though they are not part of the literal
+	// value).
+
+	s.nlsemi = true
+	s.lit = string(s.stopLit())
+	s.kind = StringLit
+	s.tok = _Literal
+}
+
+func (s *scanner) rune() {
+	s.startLit()
+
+	r := s.getr()
+	if r == '\'' {
+		s.error("empty character literal")
+	} else if r == '\n' {
+		s.ungetr() // assume newline is not part of literal
+		s.error("newline in character literal")
+	} else {
+		ok := true
+		if r == '\\' {
+			ok = s.escape('\'')
+		}
+		r = s.getr()
+		if r != '\'' {
+			// only report error if we're ok so far
+			if ok {
+				s.error("missing '")
+			}
+			s.ungetr()
+		}
+	}
+
+	s.nlsemi = true
+	s.lit = string(s.stopLit())
+	s.kind = RuneLit
+	s.tok = _Literal
+}
+
+func (s *scanner) lineComment() {
+	// recognize pragmas
+	var prefix string
+	r := s.getr()
+	switch r {
+	case 'g':
+		prefix = "go:"
+	case 'l':
+		prefix = "line "
+	default:
+		goto skip
+	}
+
+	s.startLit()
+	for _, m := range prefix {
+		if r != m {
+			s.stopLit()
+			goto skip
+		}
+		r = s.getr()
+	}
+
+	for r >= 0 {
+		if r == '\n' {
+			s.ungetr()
+			break
+		}
+		r = s.getr()
+	}
+	s.pragmas = append(s.pragmas, Pragma{
+		Line: s.line,
+		Text: strings.TrimSuffix(string(s.stopLit()), "\r"),
+	})
+	return
+
+skip:
+	// consume line
+	for r != '\n' && r >= 0 {
+		r = s.getr()
+	}
+	s.ungetr() // don't consume '\n' - needed for nlsemi logic
+}
+
+func (s *scanner) fullComment() {
+	for {
+		r := s.getr()
+		for r == '*' {
+			r = s.getr()
+			if r == '/' {
+				return
+			}
+		}
+		if r < 0 {
+			s.error_at(s.pos, s.line, "comment not terminated")
+			return
+		}
+	}
+}
+
+func (s *scanner) escape(quote rune) bool {
+	var n int
+	var base, max uint32
+
+	c := s.getr()
+	switch c {
+	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
+		return true
+	case '0', '1', '2', '3', '4', '5', '6', '7':
+		n, base, max = 3, 8, 255
+	case 'x':
+		c = s.getr()
+		n, base, max = 2, 16, 255
+	case 'u':
+		c = s.getr()
+		n, base, max = 4, 16, unicode.MaxRune
+	case 'U':
+		c = s.getr()
+		n, base, max = 8, 16, unicode.MaxRune
+	default:
+		if c < 0 {
+			return true // complain in caller about EOF
+		}
+		s.error("unknown escape sequence")
+		return false
+	}
+
+	var x uint32
+	for i := n; i > 0; i-- {
+		d := base
+		switch {
+		case isDigit(c):
+			d = uint32(c) - '0'
+		case 'a' <= c && c <= 'f':
+			d = uint32(c) - ('a' - 10)
+		case 'A' <= c && c <= 'F':
+			d = uint32(c) - ('A' - 10)
+		}
+		if d >= base {
+			if c < 0 {
+				return true // complain in caller about EOF
+			}
+			if c != quote {
+				s.error(fmt.Sprintf("illegal character %#U in escape sequence", c))
+			} else {
+				s.error("escape sequence incomplete")
+			}
+			s.ungetr()
+			return false
+		}
+		// d < base
+		x = x*base + d
+		c = s.getr()
+	}
+	s.ungetr()
+
+	if x > max && n == 3 {
+		s.error(fmt.Sprintf("octal escape value > 255: %d", x))
+		return false
+	}
+
+	if x > max || 0xD800 <= x && x < 0xE000 /* surrogate range */ {
+		s.error("escape sequence is invalid Unicode code point")
+		return false
+	}
+
+	return true
+}
--- a/src/cmd/compile/internal/syntax/scanner_test.go
+++ b/src/cmd/compile/internal/syntax/scanner_test.go
@ -0,0 +1,354 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"fmt"
+	"os"
+	"testing"
+)
+
+func TestScanner(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode")
+	}
+
+	src, err := os.Open("parser.go")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer src.Close()
+
+	var s scanner
+	s.init(src, nil)
+	for {
+		s.next()
+		if s.tok == _EOF {
+			break
+		}
+		switch s.tok {
+		case _Name:
+			fmt.Println(s.line, s.tok, "=>", s.lit)
+		case _Operator:
+			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
+		default:
+			fmt.Println(s.line, s.tok)
+		}
+	}
+}
+
+func TestTokens(t *testing.T) {
+	// make source
+	var buf []byte
+	for i, s := range sampleTokens {
+		buf = append(buf, "\t\t\t\t"[:i&3]...)     // leading indentation
+		buf = append(buf, s.src...)                // token
+		buf = append(buf, "        "[:i&7]...)     // trailing spaces
+		buf = append(buf, "/* foo */ // bar\n"...) // comments
+	}
+
+	// scan source
+	var got scanner
+	got.init(&bytesReader{buf}, nil)
+	got.next()
+	for i, want := range sampleTokens {
+		nlsemi := false
+
+		if got.line != i+1 {
+			t.Errorf("got line %d; want %d", got.line, i+1)
+		}
+
+		if got.tok != want.tok {
+			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
+			continue
+		}
+
+		switch want.tok {
+		case _Name, _Literal:
+			if got.lit != want.src {
+				t.Errorf("got lit = %q; want %q", got.lit, want.src)
+				continue
+			}
+			nlsemi = true
+
+		case _Operator, _AssignOp, _IncOp:
+			if got.op != want.op {
+				t.Errorf("got op = %s; want %s", got.op, want.op)
+				continue
+			}
+			if got.prec != want.prec {
+				t.Errorf("got prec = %s; want %s", got.prec, want.prec)
+				continue
+			}
+			nlsemi = want.tok == _IncOp
+
+		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
+			nlsemi = true
+		}
+
+		if nlsemi {
+			got.next()
+			if got.tok != _Semi {
+				t.Errorf("got tok = %s; want ;", got.tok)
+				continue
+			}
+		}
+
+		got.next()
+	}
+
+	if got.tok != _EOF {
+		t.Errorf("got %q; want _EOF", got.tok)
+	}
+}
+
+var sampleTokens = [...]struct {
+	tok  token
+	src  string
+	op   Operator
+	prec int
+}{
+	// name samples
+	{_Name, "x", 0, 0},
+	{_Name, "X123", 0, 0},
+	{_Name, "foo", 0, 0},
+	{_Name, "Foo123", 0, 0},
+	{_Name, "foo_bar", 0, 0},
+	{_Name, "_", 0, 0},
+	{_Name, "_foobar", 0, 0},
+	{_Name, "a۰۱۸", 0, 0},
+	{_Name, "foo६४", 0, 0},
+	{_Name, "bar９８７６", 0, 0},
+	{_Name, "ŝ", 0, 0},
+	{_Name, "ŝfoo", 0, 0},
+
+	// literal samples
+	{_Literal, "0", 0, 0},
+	{_Literal, "1", 0, 0},
+	{_Literal, "12345", 0, 0},
+	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
+	{_Literal, "01234567", 0, 0},
+	{_Literal, "0x0", 0, 0},
+	{_Literal, "0xcafebabe", 0, 0},
+	{_Literal, "0.", 0, 0},
+	{_Literal, "0.e0", 0, 0},
+	{_Literal, "0.e-1", 0, 0},
+	{_Literal, "0.e+123", 0, 0},
+	{_Literal, ".0", 0, 0},
+	{_Literal, ".0E00", 0, 0},
+	{_Literal, ".0E-0123", 0, 0},
+	{_Literal, ".0E+12345678901234567890", 0, 0},
+	{_Literal, ".45e1", 0, 0},
+	{_Literal, "3.14159265", 0, 0},
+	{_Literal, "1e0", 0, 0},
+	{_Literal, "1e+100", 0, 0},
+	{_Literal, "1e-100", 0, 0},
+	{_Literal, "2.71828e-1000", 0, 0},
+	{_Literal, "0i", 0, 0},
+	{_Literal, "1i", 0, 0},
+	{_Literal, "012345678901234567889i", 0, 0},
+	{_Literal, "123456789012345678890i", 0, 0},
+	{_Literal, "0.i", 0, 0},
+	{_Literal, ".0i", 0, 0},
+	{_Literal, "3.14159265i", 0, 0},
+	{_Literal, "1e0i", 0, 0},
+	{_Literal, "1e+100i", 0, 0},
+	{_Literal, "1e-100i", 0, 0},
+	{_Literal, "2.71828e-1000i", 0, 0},
+	{_Literal, "'a'", 0, 0},
+	{_Literal, "'\\000'", 0, 0},
+	{_Literal, "'\\xFF'", 0, 0},
+	{_Literal, "'\\uff16'", 0, 0},
+	{_Literal, "'\\U0000ff16'", 0, 0},
+	{_Literal, "`foobar`", 0, 0},
+	{_Literal, "`foo\tbar`", 0, 0},
+	{_Literal, "`\r`", 0, 0},
+
+	// operators
+	{_Operator, "||", OrOr, precOrOr},
+
+	{_Operator, "&&", AndAnd, precAndAnd},
+
+	{_Operator, "==", Eql, precCmp},
+	{_Operator, "!=", Neq, precCmp},
+	{_Operator, "<", Lss, precCmp},
+	{_Operator, "<=", Leq, precCmp},
+	{_Operator, ">", Gtr, precCmp},
+	{_Operator, ">=", Geq, precCmp},
+
+	{_Operator, "+", Add, precAdd},
+	{_Operator, "-", Sub, precAdd},
+	{_Operator, "|", Or, precAdd},
+	{_Operator, "^", Xor, precAdd},
+
+	{_Star, "*", Mul, precMul},
+	{_Operator, "/", Div, precMul},
+	{_Operator, "%", Rem, precMul},
+	{_Operator, "&", And, precMul},
+	{_Operator, "&^", AndNot, precMul},
+	{_Operator, "<<", Shl, precMul},
+	{_Operator, ">>", Shr, precMul},
+
+	// assignment operations
+	{_AssignOp, "+=", Add, precAdd},
+	{_AssignOp, "-=", Sub, precAdd},
+	{_AssignOp, "|=", Or, precAdd},
+	{_AssignOp, "^=", Xor, precAdd},
+
+	{_AssignOp, "*=", Mul, precMul},
+	{_AssignOp, "/=", Div, precMul},
+	{_AssignOp, "%=", Rem, precMul},
+	{_AssignOp, "&=", And, precMul},
+	{_AssignOp, "&^=", AndNot, precMul},
+	{_AssignOp, "<<=", Shl, precMul},
+	{_AssignOp, ">>=", Shr, precMul},
+
+	// other operations
+	{_IncOp, "++", Add, precAdd},
+	{_IncOp, "--", Sub, precAdd},
+	{_Assign, "=", 0, 0},
+	{_Define, ":=", 0, 0},
+	{_Arrow, "<-", 0, 0},
+
+	// delimiters
+	{_Lparen, "(", 0, 0},
+	{_Lbrack, "[", 0, 0},
+	{_Lbrace, "{", 0, 0},
+	{_Rparen, ")", 0, 0},
+	{_Rbrack, "]", 0, 0},
+	{_Rbrace, "}", 0, 0},
+	{_Comma, ",", 0, 0},
+	{_Semi, ";", 0, 0},
+	{_Colon, ":", 0, 0},
+	{_Dot, ".", 0, 0},
+	{_DotDotDot, "...", 0, 0},
+
+	// keywords
+	{_Break, "break", 0, 0},
+	{_Case, "case", 0, 0},
+	{_Chan, "chan", 0, 0},
+	{_Const, "const", 0, 0},
+	{_Continue, "continue", 0, 0},
+	{_Default, "default", 0, 0},
+	{_Defer, "defer", 0, 0},
+	{_Else, "else", 0, 0},
+	{_Fallthrough, "fallthrough", 0, 0},
+	{_For, "for", 0, 0},
+	{_Func, "func", 0, 0},
+	{_Go, "go", 0, 0},
+	{_Goto, "goto", 0, 0},
+	{_If, "if", 0, 0},
+	{_Import, "import", 0, 0},
+	{_Interface, "interface", 0, 0},
+	{_Map, "map", 0, 0},
+	{_Package, "package", 0, 0},
+	{_Range, "range", 0, 0},
+	{_Return, "return", 0, 0},
+	{_Select, "select", 0, 0},
+	{_Struct, "struct", 0, 0},
+	{_Switch, "switch", 0, 0},
+	{_Type, "type", 0, 0},
+	{_Var, "var", 0, 0},
+}
+
+func TestScanErrors(t *testing.T) {
+	for _, test := range []struct {
+		src, msg  string
+		pos, line int
+	}{
+		// Note: Positions for lexical errors are the earliest position
+		// where the error is apparent, not the beginning of the respective
+		// token.
+
+		// rune-level errors
+		{"fo\x00o", "invalid NUL character", 2, 1},
+		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 4, 2},
+		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 5, 3},
+
+		// token-level errors
+		{"x + ~y", "bitwise complement operator is ^", 4, 1},
+		{"foo$bar = 0", "invalid rune '$'", 3, 1},
+		{"const x = 0xyz", "malformed hex constant", 12, 1},
+		{"0123456789", "malformed octal constant", 10, 1},
+		{"0123456789. /* foobar", "comment not terminated", 12, 1},   // valid float constant
+		{"0123456789e0 /*\nfoobar", "comment not terminated", 13, 1}, // valid float constant
+		{"var a, b = 08, 07\n", "malformed octal constant", 13, 1},
+		{"(x + 1.0e+x)", "malformed floating-point constant exponent", 10, 1},
+
+		{`''`, "empty character literal", 1, 1},
+		{"'\n", "newline in character literal", 1, 1},
+		{`'\`, "missing '", 2, 1},
+		{`'\'`, "missing '", 3, 1},
+		{`'\x`, "missing '", 3, 1},
+		{`'\x'`, "escape sequence incomplete", 3, 1},
+		{`'\y'`, "unknown escape sequence", 2, 1},
+		{`'\x0'`, "escape sequence incomplete", 4, 1},
+		{`'\00'`, "escape sequence incomplete", 4, 1},
+		{`'\377' /*`, "comment not terminated", 7, 1}, // valid octal escape
+		{`'\378`, "illegal character U+0038 '8' in escape sequence", 4, 1},
+		{`'\400'`, "octal escape value > 255: 256", 5, 1},
+		{`'xx`, "missing '", 2, 1},
+
+		{"\"\n", "newline in string", 1, 1},
+		{`"`, "string not terminated", 0, 1},
+		{`"foo`, "string not terminated", 0, 1},
+		{"`", "string not terminated", 0, 1},
+		{"`foo", "string not terminated", 0, 1},
+		{"/*/", "comment not terminated", 0, 1},
+		{"/*\n\nfoo", "comment not terminated", 0, 1},
+		{"/*\n\nfoo", "comment not terminated", 0, 1},
+		{`"\`, "string not terminated", 0, 1},
+		{`"\"`, "string not terminated", 0, 1},
+		{`"\x`, "string not terminated", 0, 1},
+		{`"\x"`, "escape sequence incomplete", 3, 1},
+		{`"\y"`, "unknown escape sequence", 2, 1},
+		{`"\x0"`, "escape sequence incomplete", 4, 1},
+		{`"\00"`, "escape sequence incomplete", 4, 1},
+		{`"\377" /*`, "comment not terminated", 7, 1}, // valid octal escape
+		{`"\378"`, "illegal character U+0038 '8' in escape sequence", 4, 1},
+		{`"\400"`, "octal escape value > 255: 256", 5, 1},
+
+		{`s := "foo\z"`, "unknown escape sequence", 10, 1},
+		{`s := "foo\z00\nbar"`, "unknown escape sequence", 10, 1},
+		{`"\x`, "string not terminated", 0, 1},
+		{`"\x"`, "escape sequence incomplete", 3, 1},
+		{`var s string = "\x"`, "escape sequence incomplete", 18, 1},
+		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 18, 1},
+
+		// former problem cases
+		{"package p\n\n\xef", "invalid UTF-8 encoding", 11, 3},
+	} {
+		var s scanner
+		nerrors := 0
+		s.init(&bytesReader{[]byte(test.src)}, func(pos, line int, msg string) {
+			nerrors++
+			// only check the first error
+			if nerrors == 1 {
+				if msg != test.msg {
+					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
+				}
+				if pos != test.pos {
+					t.Errorf("%q: got pos = %d; want %d", test.src, pos, test.pos)
+				}
+				if line != test.line {
+					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
+				}
+			} else if nerrors > 1 {
+				t.Errorf("%q: got unexpected %q at pos = %d, line = %d", test.src, msg, pos, line)
+			}
+		})
+
+		for {
+			s.next()
+			if s.tok == _EOF {
+				break
+			}
+		}
+
+		if nerrors == 0 {
+			t.Errorf("%q: got no error; want %q", test.src, test.msg)
+		}
+	}
+}
--- a/src/cmd/compile/internal/syntax/source.go
+++ b/src/cmd/compile/internal/syntax/source.go
@ -0,0 +1,177 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"fmt"
+	"io"
+	"unicode/utf8"
+)
+
+// buf [...read...|...|...unread...|s|...free...]
+//         ^      ^   ^            ^
+//         |      |   |            |
+//        suf     r0  r            w
+
+type source struct {
+	src  io.Reader
+	errh ErrorHandler
+
+	// source buffer
+	buf         [4 << 10]byte
+	offs        int   // source offset of buf
+	r0, r, w    int   // previous/current read and write buf positions, excluding sentinel
+	line0, line int   // previous/current line
+	err         error // pending io error
+
+	// literal buffer
+	lit []byte // literal prefix
+	suf int    // literal suffix; suf >= 0 means we are scanning a literal
+}
+
+func (s *source) init(src io.Reader, errh ErrorHandler) {
+	s.src = src
+	s.errh = errh
+
+	s.buf[0] = utf8.RuneSelf // terminate with sentinel
+	s.offs = 0
+	s.r0, s.r, s.w = 0, 0, 0
+	s.line0, s.line = 1, 1
+	s.err = nil
+
+	s.lit = s.lit[:0]
+	s.suf = -1
+}
+
+func (s *source) error(msg string) {
+	s.error_at(s.pos0(), s.line0, msg)
+}
+
+func (s *source) error_at(pos, line int, msg string) {
+	if s.errh != nil {
+		s.errh(pos, line, msg)
+		return
+	}
+	panic(fmt.Sprintf("%d: %s", line, msg))
+}
+
+// pos0 returns the byte position of the last character read.
+func (s *source) pos0() int {
+	return s.offs + s.r0
+}
+
+func (s *source) ungetr() {
+	s.r, s.line = s.r0, s.line0
+}
+
+func (s *source) getr() rune {
+redo:
+	s.r0, s.line0 = s.r, s.line
+
+	// We could avoid at least one test that is always taken in the
+	// for loop below by duplicating the common case code (ASCII)
+	// here since we always have at least the sentinel (utf8.RuneSelf)
+	// in the buffer. Measure and optimize if necessary.
+
+	// make sure we have at least one rune in buffer, or we are at EOF
+	for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) {
+		s.fill() // s.w-s.r < len(s.buf) => buffer is not full
+	}
+
+	// common case: ASCII and enough bytes
+	// (invariant: s.buf[s.w] == utf8.RuneSelf)
+	if b := s.buf[s.r]; b < utf8.RuneSelf {
+		s.r++
+		if b == 0 {
+			s.error("invalid NUL character")
+			goto redo
+		}
+		if b == '\n' {
+			s.line++
+		}
+		return rune(b)
+	}
+
+	// EOF
+	if s.r == s.w {
+		if s.err != io.EOF {
+			s.error(s.err.Error())
+		}
+		return -1
+	}
+
+	// uncommon case: not ASCII
+	r, w := utf8.DecodeRune(s.buf[s.r:s.w])
+	s.r += w
+
+	if r == utf8.RuneError && w == 1 {
+		s.error("invalid UTF-8 encoding")
+		goto redo
+	}
+
+	// BOM's are only allowed as the first character in a file
+	const BOM = 0xfeff
+	if r == BOM {
+		if s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1)
+			s.error("invalid BOM in the middle of the file")
+		}
+		goto redo
+	}
+
+	return r
+}
+
+func (s *source) fill() {
+	// Slide unread bytes to beginning but preserve last read char
+	// (for one ungetr call) plus one extra byte (for a 2nd ungetr
+	// call, only for ".." character sequence and float literals
+	// starting with ".").
+	if s.r0 > 1 {
+		// save literal prefix, if any
+		// (We see at most one ungetr call while reading
+		// a literal, so make sure s.r0 remains in buf.)
+		if s.suf >= 0 {
+			s.lit = append(s.lit, s.buf[s.suf:s.r0]...)
+			s.suf = 1 // == s.r0 after slide below
+		}
+		s.offs += s.r0 - 1
+		r := s.r - s.r0 + 1 // last read char plus one byte
+		s.w = r + copy(s.buf[r:], s.buf[s.r:s.w])
+		s.r = r
+		s.r0 = 1
+	}
+
+	// read more data: try a limited number of times
+	for i := 100; i > 0; i-- {
+		n, err := s.src.Read(s.buf[s.w : len(s.buf)-1]) // -1 to leave space for sentinel
+		if n < 0 {
+			panic("negative read") // incorrect underlying io.Reader implementation
+		}
+		s.w += n
+		if n > 0 || err != nil {
+			s.buf[s.w] = utf8.RuneSelf // sentinel
+			if err != nil {
+				s.err = err
+			}
+			return
+		}
+	}
+
+	s.err = io.ErrNoProgress
+}
+
+func (s *source) startLit() {
+	s.suf = s.r0
+	s.lit = s.lit[:0] // reuse lit
+}
+
+func (s *source) stopLit() []byte {
+	lit := s.buf[s.suf:s.r]
+	if len(s.lit) > 0 {
+		lit = append(s.lit, lit...)
+	}
+	s.suf = -1 // no pending literal
+	return lit
+}
--- a/src/cmd/compile/internal/syntax/syntax.go
+++ b/src/cmd/compile/internal/syntax/syntax.go
@ -0,0 +1,61 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"fmt"
+	"io"
+	"os"
+)
+
+type Mode uint
+
+type ErrorHandler func(pos, line int, msg string)
+
+// TODO(gri) These need a lot more work.
+
+func ReadFile(filename string, errh ErrorHandler, mode Mode) (*File, error) {
+	src, err := os.Open(filename)
+	if err != nil {
+		return nil, err
+	}
+	defer src.Close()
+	return Read(src, errh, mode)
+}
+
+type bytesReader struct {
+	data []byte
+}
+
+func (r *bytesReader) Read(p []byte) (int, error) {
+	if len(r.data) > 0 {
+		n := copy(p, r.data)
+		r.data = r.data[n:]
+		return n, nil
+	}
+	return 0, io.EOF
+}
+
+func ReadBytes(src []byte, errh ErrorHandler, mode Mode) (*File, error) {
+	return Read(&bytesReader{src}, errh, mode)
+}
+
+func Read(src io.Reader, errh ErrorHandler, mode Mode) (*File, error) {
+	var p parser
+	p.init(src, errh)
+
+	p.next()
+	ast := p.file()
+
+	if errh == nil && p.nerrors > 0 {
+		return nil, fmt.Errorf("%d syntax errors", p.nerrors)
+	}
+
+	return ast, nil
+}
+
+func Write(w io.Writer, n *File) error {
+	panic("unimplemented")
+}
--- a/src/cmd/compile/internal/syntax/tokens.go
+++ b/src/cmd/compile/internal/syntax/tokens.go
@ -0,0 +1,263 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import "fmt"
+
+type token uint
+
+const (
+	_ token = iota
+	_EOF
+
+	// names and literals
+	_Name
+	_Literal
+
+	// operators and operations
+	_Operator // excluding '*' (_Star)
+	_AssignOp
+	_IncOp
+	_Assign
+	_Define
+	_Arrow
+	_Star
+
+	// delimitors
+	_Lparen
+	_Lbrack
+	_Lbrace
+	_Rparen
+	_Rbrack
+	_Rbrace
+	_Comma
+	_Semi
+	_Colon
+	_Dot
+	_DotDotDot
+
+	// keywords
+	_Break
+	_Case
+	_Chan
+	_Const
+	_Continue
+	_Default
+	_Defer
+	_Else
+	_Fallthrough
+	_For
+	_Func
+	_Go
+	_Goto
+	_If
+	_Import
+	_Interface
+	_Map
+	_Package
+	_Range
+	_Return
+	_Select
+	_Struct
+	_Switch
+	_Type
+	_Var
+
+	tokenCount
+)
+
+const (
+	// for BranchStmt
+	Break       = _Break
+	Continue    = _Continue
+	Fallthrough = _Fallthrough
+	Goto        = _Goto
+
+	// for CallStmt
+	Go    = _Go
+	Defer = _Defer
+)
+
+var tokstrings = [...]string{
+	// source control
+	_EOF: "EOF",
+
+	// names and literals
+	_Name:    "name",
+	_Literal: "literal",
+
+	// operators and operations
+	_Operator: "op",
+	_AssignOp: "op=",
+	_IncOp:    "opop",
+	_Assign:   "=",
+	_Define:   ":=",
+	_Arrow:    "<-",
+	_Star:     "*",
+
+	// delimitors
+	_Lparen:    "(",
+	_Lbrack:    "[",
+	_Lbrace:    "{",
+	_Rparen:    ")",
+	_Rbrack:    "]",
+	_Rbrace:    "}",
+	_Comma:     ",",
+	_Semi:      ";",
+	_Colon:     ":",
+	_Dot:       ".",
+	_DotDotDot: "...",
+
+	// keywords
+	_Break:       "break",
+	_Case:        "case",
+	_Chan:        "chan",
+	_Const:       "const",
+	_Continue:    "continue",
+	_Default:     "default",
+	_Defer:       "defer",
+	_Else:        "else",
+	_Fallthrough: "fallthrough",
+	_For:         "for",
+	_Func:        "func",
+	_Go:          "go",
+	_Goto:        "goto",
+	_If:          "if",
+	_Import:      "import",
+	_Interface:   "interface",
+	_Map:         "map",
+	_Package:     "package",
+	_Range:       "range",
+	_Return:      "return",
+	_Select:      "select",
+	_Struct:      "struct",
+	_Switch:      "switch",
+	_Type:        "type",
+	_Var:         "var",
+}
+
+func (tok token) String() string {
+	var s string
+	if 0 <= tok && int(tok) < len(tokstrings) {
+		s = tokstrings[tok]
+	}
+	if s == "" {
+		s = fmt.Sprintf("<tok-%d>", tok)
+	}
+	return s
+}
+
+// Make sure we have at most 64 tokens so we can use them in a set.
+const _ uint64 = 1 << (tokenCount - 1)
+
+// contains reports whether tok is in tokset.
+func contains(tokset uint64, tok token) bool {
+	return tokset&(1<<tok) != 0
+}
+
+type LitKind uint
+
+const (
+	IntLit LitKind = iota
+	FloatLit
+	ImagLit
+	RuneLit
+	StringLit
+)
+
+type Operator uint
+
+const (
+	_    Operator = iota
+	Def           // :=
+	Not           // !
+	Recv          // <-
+
+	// precOrOr
+	OrOr // ||
+
+	// precAndAnd
+	AndAnd // &&
+
+	// precCmp
+	Eql // ==
+	Neq // !=
+	Lss // <
+	Leq // <=
+	Gtr // >
+	Geq // >=
+
+	// precAdd
+	Add // +
+	Sub // -
+	Or  // |
+	Xor // ^
+
+	// precMul
+	Mul    // *
+	Div    // /
+	Rem    // %
+	And    // &
+	AndNot // &^
+	Shl    // <<
+	Shr    // >>
+)
+
+var opstrings = [...]string{
+	// prec == 0
+	Def:  ":", // : in :=
+	Not:  "!",
+	Recv: "<-",
+
+	// precOrOr
+	OrOr: "||",
+
+	// precAndAnd
+	AndAnd: "&&",
+
+	// precCmp
+	Eql: "==",
+	Neq: "!=",
+	Lss: "<",
+	Leq: "<=",
+	Gtr: ">",
+	Geq: ">=",
+
+	// precAdd
+	Add: "+",
+	Sub: "-",
+	Or:  "|",
+	Xor: "^",
+
+	// precMul
+	Mul:    "*",
+	Div:    "/",
+	Rem:    "%",
+	And:    "&",
+	AndNot: "&^",
+	Shl:    "<<",
+	Shr:    ">>",
+}
+
+func (op Operator) String() string {
+	var s string
+	if 0 <= op && int(op) < len(opstrings) {
+		s = opstrings[op]
+	}
+	if s == "" {
+		s = fmt.Sprintf("<op-%d>", op)
+	}
+	return s
+}
+
+// Operator precedences
+const (
+	_ = iota
+	precOrOr
+	precAndAnd
+	precCmp
+	precAdd
+	precMul
+)