[dev.typeparams] cmd/compile: scaffolding for export data experiments

This CL adds a simple framework for augmenting the current export data format by writing out additional data *after* the existing data, with an extra header before it that current readers ignore. In particular, this is used by unified IR to be able to experiment and iterate on export data designs without having to keep the go/internal/gcimporter and x/tools/go/gcexportdata importers in sync. Instead, they simply continue reading the existing data written out by typecheck/iexport.go. Change-Id: I883211c2892e2c7dec758b85ff6bc31b244440a0 Reviewed-on: https://go-review.googlesource.com/c/go/+/327169 Run-TryBot: Matthew Dempsky <mdempsky@google.com> Trust: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Cuong Manh Le <cuong.manhle.vn@gmail.com>
2024-07-19 19:44:12 +00:00 · 2021-06-11 06:33:30 -07:00 · 2021-06-11 06:33:30 -07:00 · 2954f11ead
parent c93d5d1a52
commit 2954f11ead
2 changed files with 111 additions and 31 deletions
--- a/src/cmd/compile/internal/noder/export.go
+++ b/src/cmd/compile/internal/noder/export.go
@ -5,22 +5,61 @@
 package noder

 import (
+	"bytes"
 	"fmt"
+	"io"

 	"cmd/compile/internal/base"
 	"cmd/compile/internal/typecheck"
 	"cmd/internal/bio"
 )

+// writeNewExportFunc is a hook that can be added to append extra
+// export data after the normal export data section. It allows
+// experimenting with new export data format designs without requiring
+// immediate support in the go/internal or x/tools importers.
+var writeNewExportFunc func(out io.Writer)
+
 func WriteExports(out *bio.Writer) {
+	// When unified IR exports are enable, we simply append it to the
+	// end of the normal export data (with compiler extensions
+	// disabled), and write an extra header giving its size.
+	//
+	// If the compiler sees this header, it knows to read the new data
+	// instead; meanwhile the go/types importers will silently ignore it
+	// and continue processing the old export instead.
+	//
+	// This allows us to experiment with changes to the new export data
+	// format without needing to update the go/internal/gcimporter or
+	// (worse) x/tools/go/gcexportdata.
+
+	useNewExport := writeNewExportFunc != nil
+
+	var old, new bytes.Buffer
+
+	typecheck.WriteExports(&old, !useNewExport)
+
+	if useNewExport {
+		writeNewExportFunc(&new)
+	}
+
+	oldLen := old.Len()
+	newLen := new.Len()
+
+	if useNewExport {
+		fmt.Fprintf(out, "\nnewexportsize %v\n", newLen)
+	}
+
 	// The linker also looks for the $$ marker - use char after $$ to distinguish format.
 	out.WriteString("\n$$B\n") // indicate binary export format
-	off := out.Offset()
-	typecheck.WriteExports(out, true)
-	size := out.Offset() - off
+	io.Copy(out, &old)
 	out.WriteString("\n$$\n")
+	io.Copy(out, &new)

 	if base.Debug.Export != 0 {
-		fmt.Printf("BenchmarkExportSize:%s 1 %d bytes\n", base.Ctxt.Pkgpath, size)
+		fmt.Printf("BenchmarkExportSize:%s 1 %d bytes\n", base.Ctxt.Pkgpath, oldLen)
+		if useNewExport {
+			fmt.Printf("BenchmarkNewExportSize:%s 1 %d bytes\n", base.Ctxt.Pkgpath, newLen)
+		}
 	}
 }
--- a/src/cmd/compile/internal/noder/import.go
+++ b/src/cmd/compile/internal/noder/import.go
@ -31,6 +31,22 @@ import (
 	"cmd/internal/src"
 )

+// haveLegacyImports records whether we've imported any packages
+// without a new export data section. This is useful for experimenting
+// with new export data format designs, when you need to support
+// existing tests that manually compile files with inconsistent
+// compiler flags.
+var haveLegacyImports = false
+
+// newReadImportFunc is an extension hook for experimenting with new
+// export data formats. If a new export data payload was written out
+// for an imported package by overloading writeNewExportFunc, then
+// that payload will be mapped into memory and passed to
+// newReadImportFunc.
+var newReadImportFunc = func(data string, pkg1 *types.Pkg, check *types2.Checker, packages map[string]*types2.Package) (pkg2 *types2.Package, err error) {
+	panic("unexpected new export data payload")
+}
+
 type gcimports struct {
 	check    *types2.Checker
 	packages map[string]*types2.Package
@ -245,7 +261,7 @@ func readImportFile(path string, target *ir.Package, check *types2.Checker, pack
 	}
 	defer f.Close()

-	r, end, err := findExportData(f)
+	r, end, newsize, err := findExportData(f)
 	if err != nil {
 		return
 	}
@ -254,34 +270,51 @@ func readImportFile(path string, target *ir.Package, check *types2.Checker, pack
 		fmt.Printf("importing %s (%s)\n", path, f.Name())
 	}

-	var c byte
-	switch c, err = r.ReadByte(); {
-	case err != nil:
-		return
-
-	case c != 'i':
-		// Indexed format is distinguished by an 'i' byte,
-		// whereas previous export formats started with 'c', 'd', or 'v'.
-		err = fmt.Errorf("unexpected package format byte: %v", c)
-		return
-	}
-
-	// Map string (and data) section into memory as a single large
-	// string. This reduces heap fragmentation and allows
-	// returning individual substrings very efficiently.
-	pos := r.Offset()
-	data, err := base.MapFile(r.File(), pos, end-pos)
-	if err != nil {
-		return
-	}
-
-	typecheck.ReadImports(pkg1, data)
-
-	if packages != nil {
-		pkg2, err = importer.ImportData(packages, data, path)
+	if newsize != 0 {
+		// We have unified IR data. Map it, and feed to the importers.
+		end -= newsize
+		var data string
+		data, err = base.MapFile(r.File(), end, newsize)
 		if err != nil {
 			return
 		}
+
+		pkg2, err = newReadImportFunc(data, pkg1, check, packages)
+	} else {
+		// We only have old data. Oh well, fall back to the legacy importers.
+		haveLegacyImports = true
+
+		var c byte
+		switch c, err = r.ReadByte(); {
+		case err != nil:
+			return
+
+		case c != 'i':
+			// Indexed format is distinguished by an 'i' byte,
+			// whereas previous export formats started with 'c', 'd', or 'v'.
+			err = fmt.Errorf("unexpected package format byte: %v", c)
+			return
+		}
+
+		pos := r.Offset()
+
+		// Map string (and data) section into memory as a single large
+		// string. This reduces heap fragmentation and allows
+		// returning individual substrings very efficiently.
+		var data string
+		data, err = base.MapFile(r.File(), pos, end-pos)
+		if err != nil {
+			return
+		}
+
+		typecheck.ReadImports(pkg1, data)
+
+		if packages != nil {
+			pkg2, err = importer.ImportData(packages, data, path)
+			if err != nil {
+				return
+			}
+		}
 	}

 	err = addFingerprint(path, f, end)
@ -291,7 +324,7 @@ func readImportFile(path string, target *ir.Package, check *types2.Checker, pack
 // findExportData returns a *bio.Reader positioned at the start of the
 // binary export data section, and a file offset for where to stop
 // reading.
-func findExportData(f *os.File) (r *bio.Reader, end int64, err error) {
+func findExportData(f *os.File) (r *bio.Reader, end, newsize int64, err error) {
 	r = bio.NewReader(f)

 	// check object header
@ -334,6 +367,14 @@ func findExportData(f *os.File) (r *bio.Reader, end int64, err error) {

 	// process header lines
 	for !strings.HasPrefix(line, "$$") {
+		if strings.HasPrefix(line, "newexportsize ") {
+			fields := strings.Fields(line)
+			newsize, err = strconv.ParseInt(fields[1], 10, 64)
+			if err != nil {
+				return
+			}
+		}
+
 		line, err = r.ReadString('\n')
 		if err != nil {
 			return