From 54fe57bc22f7890810bbddae2499eda8d4acfaef Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Thu, 17 Jun 2021 01:12:23 -0700 Subject: [PATCH] [dev.typeparams] cmd/compile: record writer's stack at export data sync points This CL extends the unified export data format's existing sync mechanism to save writer stacks, controlled by the -d=syncframes debug flag. This allows readers to provide more details when reporting desync errors, which should simplify development of the data format and the various reader/writer implementations. For example, CL 328051 updated reader and writer, but missed making a similar change to the linker (fix in CL 328054). Re-reviewing the CL in isolation after the failure, it was not immediately obvious what was going wrong. But the pair of stack traces below identifies exactly what happened: it should have updated linker.relocFuncExt to write out the new sync marker too. ``` data sync error: package "internal/abi", section 6, index 4, offset 536 found UseReloc, written at: /home/mdempsky/wd/go/src/cmd/compile/internal/noder/encoder.go:221: (*encoder).reloc +0x44 /home/mdempsky/wd/go/src/cmd/compile/internal/noder/linker.go:214: (*linker).relocFuncExt +0x580 /home/mdempsky/wd/go/src/cmd/compile/internal/noder/linker.go:233: (*linker).relocTypeExt +0x234 /home/mdempsky/wd/go/src/cmd/compile/internal/noder/linker.go:161: (*linker).relocObj +0x2198 /home/mdempsky/wd/go/src/cmd/compile/internal/noder/linker.go:64: (*linker).relocIdx +0x196 expected ImplicitTypes, reading at: /home/mdempsky/wd/go/src/cmd/compile/internal/noder/reader.go:796: (*reader).implicitTypes +0x36 /home/mdempsky/wd/go/src/cmd/compile/internal/noder/reader.go:810: (*reader).addBody +0x81 /home/mdempsky/wd/go/src/cmd/compile/internal/noder/reader.go:727: (*reader).funcExt +0x542 /home/mdempsky/wd/go/src/cmd/compile/internal/noder/reader.go:651: (*reader).method +0x324 /home/mdempsky/wd/go/src/cmd/compile/internal/noder/reader.go:557: (*pkgReader).objIdx +0x2704 ``` Change-Id: I911193edd2a965f81b7459f15fb613a773584685 Reviewed-on: https://go-review.googlesource.com/c/go/+/328909 Run-TryBot: Matthew Dempsky TryBot-Result: Go Bot Trust: Matthew Dempsky Reviewed-by: Cuong Manh Le Reviewed-by: Robert Griesemer --- src/cmd/compile/internal/base/debug.go | 1 + src/cmd/compile/internal/noder/decoder.go | 100 ++++++++++++++---- src/cmd/compile/internal/noder/encoder.go | 83 +++++++++++---- src/cmd/compile/internal/noder/frames_go1.go | 20 ++++ src/cmd/compile/internal/noder/frames_go17.go | 24 +++++ src/cmd/compile/internal/noder/reader.go | 2 +- src/cmd/compile/internal/noder/sync.go | 33 +++++- src/cmd/compile/internal/noder/writer.go | 2 +- 8 files changed, 219 insertions(+), 46 deletions(-) create mode 100644 src/cmd/compile/internal/noder/frames_go1.go create mode 100644 src/cmd/compile/internal/noder/frames_go17.go diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go index 824a8baa83..fd42021946 100644 --- a/src/cmd/compile/internal/base/debug.go +++ b/src/cmd/compile/internal/base/debug.go @@ -44,6 +44,7 @@ type DebugFlags struct { Panic int `help:"show all compiler panics"` Slice int `help:"print information about slice compilation"` SoftFloat int `help:"force compiler to emit soft-float code"` + SyncFrames int `help:"how many writer stack frames to include at sync points in unified export data"` TypeAssert int `help:"print information about type assertion inlining"` TypecheckInl int `help:"eager typechecking of inline function bodies"` Unified int `help:"enable unified IR construction"` diff --git a/src/cmd/compile/internal/noder/decoder.go b/src/cmd/compile/internal/noder/decoder.go index 023388875c..3dc61c6a69 100644 --- a/src/cmd/compile/internal/noder/decoder.go +++ b/src/cmd/compile/internal/noder/decoder.go @@ -13,6 +13,7 @@ import ( "go/token" "math/big" "os" + "runtime" "strings" "cmd/compile/internal/base" @@ -131,17 +132,82 @@ func (r *decoder) checkErr(err error) { } } -func (r *decoder) sync(m syncMarker) { - if debug { - pos, err0 := r.data.Seek(0, os.SEEK_CUR) - x, err := r.data.ReadByte() - r.checkErr(err) - if x != byte(m) { - // TODO(mdempsky): Revisit this error message, and make it more - // useful (e.g., include r.p.pkgPath). - base.Fatalf("data sync error: found %v at %v (%v) in (%v:%v), but expected %v", syncMarker(x), pos, err0, r.k, r.idx, m) - } +func (r *decoder) rawUvarint() uint64 { + x, err := binary.ReadUvarint(&r.data) + r.checkErr(err) + return x +} + +func (r *decoder) rawVarint() int64 { + ux := r.rawUvarint() + + // Zig-zag decode. + x := int64(ux >> 1) + if ux&1 != 0 { + x = ^x } + return x +} + +func (r *decoder) rawReloc(k reloc, idx int) int { + e := r.relocs[idx] + assert(e.kind == k) + return e.idx +} + +func (r *decoder) sync(mWant syncMarker) { + if !enableSync { + return + } + + pos, _ := r.data.Seek(0, os.SEEK_CUR) // TODO(mdempsky): io.SeekCurrent after #44505 is resolved + mHave := syncMarker(r.rawUvarint()) + writerPCs := make([]int, r.rawUvarint()) + for i := range writerPCs { + writerPCs[i] = int(r.rawUvarint()) + } + + if mHave == mWant { + return + } + + // There's some tension here between printing: + // + // (1) full file paths that tools can recognize (e.g., so emacs + // hyperlinks the "file:line" text for easy navigation), or + // + // (2) short file paths that are easier for humans to read (e.g., by + // omitting redundant or irrelevant details, so it's easier to + // focus on the useful bits that remain). + // + // The current formatting favors the former, as it seems more + // helpful in practice. But perhaps the formatting could be improved + // to better address both concerns. For example, use relative file + // paths if they would be shorter, or rewrite file paths to contain + // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how + // to reliably expand that again. + + fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.idx, pos) + + fmt.Printf("\nfound %v, written at:\n", mHave) + if len(writerPCs) == 0 { + fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath) + } + for _, pc := range writerPCs { + fmt.Printf("\t%s\n", r.common.stringIdx(r.rawReloc(relocString, pc))) + } + + fmt.Printf("\nexpected %v, reading at:\n", mWant) + var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size? + n := runtime.Callers(2, readerPCs[:]) + for _, pc := range fmtFrames(readerPCs[:n]...) { + fmt.Printf("\t%s\n", pc) + } + + // We already printed a stack trace for the reader, so now we can + // simply exit. Printing a second one with panic or base.Fatalf + // would just be noise. + os.Exit(1) } func (r *decoder) bool() bool { @@ -154,16 +220,12 @@ func (r *decoder) bool() bool { func (r *decoder) int64() int64 { r.sync(syncInt64) - x, err := binary.ReadVarint(&r.data) - r.checkErr(err) - return x + return r.rawVarint() } func (r *decoder) uint64() uint64 { r.sync(syncUint64) - x, err := binary.ReadUvarint(&r.data) - r.checkErr(err) - return x + return r.rawUvarint() } func (r *decoder) len() int { x := r.uint64(); v := int(x); assert(uint64(v) == x); return v } @@ -177,11 +239,7 @@ func (r *decoder) code(mark syncMarker) int { func (r *decoder) reloc(k reloc) int { r.sync(syncUseReloc) - idx := r.len() - - e := r.relocs[idx] - assert(e.kind == k) - return e.idx + return r.rawReloc(k, r.len()) } func (r *decoder) string() string { diff --git a/src/cmd/compile/internal/noder/encoder.go b/src/cmd/compile/internal/noder/encoder.go index dc288dc29f..d8ab0f6255 100644 --- a/src/cmd/compile/internal/noder/encoder.go +++ b/src/cmd/compile/internal/noder/encoder.go @@ -13,6 +13,7 @@ import ( "go/constant" "io" "math/big" + "runtime" "cmd/compile/internal/base" ) @@ -93,6 +94,8 @@ type encoder struct { relocs []relocEnt data bytes.Buffer + encodingRelocHeader bool + k reloc idx int } @@ -107,6 +110,10 @@ func (w *encoder) flush() int { // TODO(mdempsky): Consider writing these out separately so they're // easier to strip, along with function bodies, so that we can prune // down to just the data that's relevant to go/types. + if w.encodingRelocHeader { + base.Fatalf("encodingRelocHeader already true; recursive flush?") + } + w.encodingRelocHeader = true w.sync(syncRelocs) w.len(len(w.relocs)) for _, rent := range w.relocs { @@ -128,10 +135,58 @@ func (w *encoder) checkErr(err error) { } } +func (w *encoder) rawUvarint(x uint64) { + var buf [binary.MaxVarintLen64]byte + n := binary.PutUvarint(buf[:], x) + _, err := w.data.Write(buf[:n]) + w.checkErr(err) +} + +func (w *encoder) rawVarint(x int64) { + // Zig-zag encode. + ux := uint64(x) << 1 + if x < 0 { + ux = ^ux + } + + w.rawUvarint(ux) +} + +func (w *encoder) rawReloc(r reloc, idx int) int { + // TODO(mdempsky): Use map for lookup. + for i, rent := range w.relocs { + if rent.kind == r && rent.idx == idx { + return i + } + } + + i := len(w.relocs) + w.relocs = append(w.relocs, relocEnt{r, idx}) + return i +} + func (w *encoder) sync(m syncMarker) { - if debug { - err := w.data.WriteByte(byte(m)) - w.checkErr(err) + if !enableSync { + return + } + + // Writing out stack frame string references requires working + // relocations, but writing out the relocations themselves involves + // sync markers. To prevent infinite recursion, we simply trim the + // stack frame for sync markers within the relocation header. + var frames []string + if !w.encodingRelocHeader && base.Debug.SyncFrames > 0 { + pcs := make([]uintptr, base.Debug.SyncFrames) + n := runtime.Callers(2, pcs) + frames = fmtFrames(pcs[:n]...) + } + + // TODO(mdempsky): Save space by writing out stack frames as a + // linked list so we can share common stack frames. + w.rawUvarint(uint64(m)) + w.rawUvarint(uint64(len(frames))) + for _, frame := range frames { + w.rawUvarint(uint64(w.rawReloc(relocString, w.p.stringIdx(frame)))) } } @@ -148,18 +203,12 @@ func (w *encoder) bool(b bool) bool { func (w *encoder) int64(x int64) { w.sync(syncInt64) - var buf [binary.MaxVarintLen64]byte - n := binary.PutVarint(buf[:], x) - _, err := w.data.Write(buf[:n]) - w.checkErr(err) + w.rawVarint(x) } func (w *encoder) uint64(x uint64) { w.sync(syncUint64) - var buf [binary.MaxVarintLen64]byte - n := binary.PutUvarint(buf[:], x) - _, err := w.data.Write(buf[:n]) - w.checkErr(err) + w.rawUvarint(x) } func (w *encoder) len(x int) { assert(x >= 0); w.uint64(uint64(x)) } @@ -168,17 +217,7 @@ func (w *encoder) uint(x uint) { w.uint64(uint64(x)) } func (w *encoder) reloc(r reloc, idx int) { w.sync(syncUseReloc) - - // TODO(mdempsky): Use map for lookup. - for i, rent := range w.relocs { - if rent.kind == r && rent.idx == idx { - w.len(i) - return - } - } - - w.len(len(w.relocs)) - w.relocs = append(w.relocs, relocEnt{r, idx}) + w.len(w.rawReloc(r, idx)) } func (w *encoder) code(c code) { diff --git a/src/cmd/compile/internal/noder/frames_go1.go b/src/cmd/compile/internal/noder/frames_go1.go new file mode 100644 index 0000000000..2958efd622 --- /dev/null +++ b/src/cmd/compile/internal/noder/frames_go1.go @@ -0,0 +1,20 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !go1.7 + +// TODO(mdempsky): Remove after #44505 is resolved + +package noder + +import "runtime" + +func walkFrames(pcs []uintptr, visit frameVisitor) { + for _, pc := range pcs { + fn := runtime.FuncForPC(pc) + file, line := fn.FileLine(pc) + + visit(file, line, fn.Name(), pc-fn.Entry()) + } +} diff --git a/src/cmd/compile/internal/noder/frames_go17.go b/src/cmd/compile/internal/noder/frames_go17.go new file mode 100644 index 0000000000..273217e39a --- /dev/null +++ b/src/cmd/compile/internal/noder/frames_go17.go @@ -0,0 +1,24 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.7 + +package noder + +import "runtime" + +func walkFrames(pcs []uintptr, visit frameVisitor) { + if len(pcs) == 0 { + return + } + + frames := runtime.CallersFrames(pcs) + for { + frame, more := frames.Next() + visit(frame.File, frame.Line, frame.Function, frame.PC-frame.Entry) + if !more { + return + } + } +} diff --git a/src/cmd/compile/internal/noder/reader.go b/src/cmd/compile/internal/noder/reader.go index 803acaa88d..efa607e13b 100644 --- a/src/cmd/compile/internal/noder/reader.go +++ b/src/cmd/compile/internal/noder/reader.go @@ -910,7 +910,7 @@ func (r *reader) addLocal(name *ir.Name, ctxt ir.Class) { assert(ctxt == ir.PAUTO || ctxt == ir.PPARAM || ctxt == ir.PPARAMOUT) r.sync(syncAddLocal) - if debug { + if enableSync { want := r.int() if have := len(r.locals); have != want { base.FatalfAt(name.Pos(), "locals table has desynced") diff --git a/src/cmd/compile/internal/noder/sync.go b/src/cmd/compile/internal/noder/sync.go index 7326a6edbe..aef98dbd78 100644 --- a/src/cmd/compile/internal/noder/sync.go +++ b/src/cmd/compile/internal/noder/sync.go @@ -6,8 +6,39 @@ package noder -const debug = true +import ( + "fmt" + "strings" +) +// enableSync controls whether sync markers are written into unified +// IR's export data format and also whether they're expected when +// reading them back in. They're inessential to the correct +// functioning of unified IR, but are helpful during development to +// detect mistakes. +// +// When sync is enabled, writer stack frames will also be included in +// the export data. Currently, a fixed number of frames are included, +// controlled by -d=syncframes (default 0). +const enableSync = true + +// fmtFrames formats a backtrace for reporting reader/writer desyncs. +func fmtFrames(pcs ...uintptr) []string { + res := make([]string, 0, len(pcs)) + walkFrames(pcs, func(file string, line int, name string, offset uintptr) { + // Trim package from function name. It's just redundant noise. + name = strings.TrimPrefix(name, "cmd/compile/internal/noder.") + + res = append(res, fmt.Sprintf("%s:%v: %s +0x%v", file, line, name, offset)) + }) + return res +} + +type frameVisitor func(file string, line int, name string, offset uintptr) + +// syncMarker is an enum type that represents markers that may be +// written to export data to ensure the reader and writer stay +// synchronized. type syncMarker int //go:generate stringer -type=syncMarker -trimprefix=sync diff --git a/src/cmd/compile/internal/noder/writer.go b/src/cmd/compile/internal/noder/writer.go index 1475540d84..cc749b0d1e 100644 --- a/src/cmd/compile/internal/noder/writer.go +++ b/src/cmd/compile/internal/noder/writer.go @@ -733,7 +733,7 @@ func (w *writer) funcarg(param *types2.Var, result bool) { func (w *writer) addLocal(obj types2.Object) { w.sync(syncAddLocal) idx := len(w.localsIdx) - if debug { + if enableSync { w.int(idx) } w.localsIdx[obj] = idx