1
0
mirror of https://github.com/golang/go synced 2024-07-05 09:50:19 +00:00

cmd/go: add per-package indexing for modules outside mod cache

Packages outside the module cache including the standard library will be
indexed individually rather than as a whole module.

For #52876

Change-Id: I142dad6a790e9e8eb4dc6430a588fbfa86552e49
Reviewed-on: https://go-review.googlesource.com/c/go/+/413815
Reviewed-by: Michael Matloob <matloob@golang.org>
Run-TryBot: Michael Matloob <matloob@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Michael Matloob 2022-06-23 15:46:29 -04:00
parent b9c4d94fdb
commit 5a1c5b8ae7
8 changed files with 244 additions and 71 deletions

View File

@ -878,8 +878,8 @@ func loadPackageData(ctx context.Context, path, parentPath, parentDir, parentRoo
buildMode = build.ImportComment
}
if modroot := modload.PackageModRoot(ctx, r.path); modroot != "" {
if mi, err := modindex.Get(modroot); err == nil {
data.p, data.err = mi.Import(cfg.BuildContext, mi.RelPath(r.dir), buildMode)
if rp, err := modindex.GetPackage(modroot, r.dir); err == nil {
data.p, data.err = rp.Import(cfg.BuildContext, buildMode)
goto Happy
} else if !errors.Is(err, modindex.ErrNotIndexed) {
base.Fatalf("go: %v", err)

View File

@ -7,6 +7,8 @@ Strings are written into the string table at the end of the file. Each string
is null-terminated. String offsets are relative to the start of the string table.
Bools are written as uint32s: 0 for false and 1 for true.
The following is the format for a full module:
“go index v0\n”
str uint32 - offset of string table
n uint32 - number of packages
@ -40,7 +42,16 @@ for each RawPackage:
position - file, offset, line, column - uint32
[string table]
// parseError struct
The following is the format for a single indexed package:
“go index v0\n”
str uint32 - offset of string table
for the single RawPackage:
[same RawPackage format as above]
[string table]
The following is the definition of the json-serialized parseError struct:
type parseError struct {
ErrorList *scanner.ErrorList // non-nil if the error was an ErrorList, nil otherwise
ErrorString string // non-empty for all other cases

View File

@ -23,11 +23,13 @@ import (
"sort"
"strings"
"sync"
"time"
"unsafe"
"cmd/go/internal/base"
"cmd/go/internal/cache"
"cmd/go/internal/cfg"
"cmd/go/internal/fsys"
"cmd/go/internal/imports"
"cmd/go/internal/par"
"cmd/go/internal/str"
@ -39,20 +41,16 @@ import (
// module index.
var enabled bool = godebug.Get("goindex") != "0"
// ModuleIndex represents and encoded module index file. It is used to
// Module represents and encoded module index file. It is used to
// do the equivalent of build.Import of packages in the module and answer other
// questions based on the index file's data.
type ModuleIndex struct {
type Module struct {
modroot string
od offsetDecoder
packages map[string]int // offsets of each package
packagePaths []string // paths to package directories relative to modroot; these are the keys of packages
}
var fcache par.Cache
var salt = godebug.Get("goindexsalt")
// moduleHash returns an ActionID corresponding to the state of the module
// located at filesystem path modroot.
func moduleHash(modroot string, ismodcache bool) (cache.ActionID, error) {
@ -75,7 +73,45 @@ func moduleHash(modroot string, ismodcache bool) (cache.ActionID, error) {
}
h := cache.NewHash("moduleIndex")
fmt.Fprintf(h, "module index %s %s %s %v\n", runtime.Version(), salt, indexVersion, modroot)
fmt.Fprintf(h, "module index %s %s %v\n", runtime.Version(), indexVersion, modroot)
return h.Sum(), nil
}
const modTimeCutoff = 2 * time.Second
// dirHash returns an ActionID corresponding to the state of the package
// located at filesystem path pkgdir.
func dirHash(pkgdir string) (cache.ActionID, error) {
h := cache.NewHash("moduleIndex")
fmt.Fprintf(h, "package %s %s %v\n", runtime.Version(), indexVersion, pkgdir)
entries, err := fsys.ReadDir(pkgdir)
if err != nil {
// pkgdir might not be a directory. give up on hashing.
return cache.ActionID{}, ErrNotIndexed
}
cutoff := time.Now().Add(-modTimeCutoff)
for _, info := range entries {
if info.IsDir() {
continue
}
if !info.Mode().IsRegular() {
return cache.ActionID{}, ErrNotIndexed
}
// To avoid problems for very recent files where a new
// write might not change the mtime due to file system
// mtime precision, reject caching if a file was read that
// is less than modTimeCutoff old.
//
// This is the same strategy used for hashing test inputs.
// See hashOpen in cmd/go/internal/test/test.go for the
// corresponding code.
if info.ModTime().After(cutoff) {
return cache.ActionID{}, ErrNotIndexed
}
fmt.Fprintf(h, "file %v %v %v\n", info.Name(), info.ModTime(), info.Size())
}
return h.Sum(), nil
}
@ -83,31 +119,61 @@ var modrootCache par.Cache
var ErrNotIndexed = errors.New("not in module index")
// Get returns the ModuleIndex for the module rooted at modroot.
var (
errDisabled = fmt.Errorf("%w: module indexing disabled", ErrNotIndexed)
errNotFromModuleCache = fmt.Errorf("%w: not from module cache", ErrNotIndexed)
)
// GetPackage returns the IndexPackage for the package at the given path.
// It will return ErrNotIndexed if the directory should be read without
// using the index, for instance because the index is disabled, or the packgae
// is not in a module.
func Get(modroot string) (*ModuleIndex, error) {
if !enabled || cache.DefaultDir() == "off" || cfg.BuildMod == "vendor" {
return nil, ErrNotIndexed
func GetPackage(modroot, pkgdir string) (*IndexPackage, error) {
mi, err := GetModule(modroot)
if err == nil {
return mi.Package(relPath(pkgdir, modroot)), nil
}
if modroot == "" {
panic("modindex.Get called with empty modroot")
if !errors.Is(err, errNotFromModuleCache) {
return nil, err
}
modroot = filepath.Clean(modroot)
isModCache := str.HasFilePathPrefix(modroot, cfg.GOMODCACHE)
return openIndex(modroot, isModCache)
return openIndexPackage(modroot, pkgdir)
}
// openIndex returns the module index for modPath.
// GetModule returns the Module for the given modroot.
// It will return ErrNotIndexed if the directory should be read without
// using the index, for instance because the index is disabled, or the packgae
// is not in a module.
func GetModule(modroot string) (*Module, error) {
if !enabled || cache.DefaultDir() == "off" {
return nil, errDisabled
}
if modroot == "" {
panic("modindex.GetPackage called with empty modroot")
}
if cfg.BuildMod == "vendor" {
// Even if the main module is in the module cache,
// its vendored dependencies are not loaded from their
// usual cached locations.
return nil, errNotFromModuleCache
}
modroot = filepath.Clean(modroot)
if !str.HasFilePathPrefix(modroot, cfg.GOMODCACHE) {
return nil, errNotFromModuleCache
}
return openIndexModule(modroot, true)
}
var mcache par.Cache
// openIndexModule returns the module index for modPath.
// It will return ErrNotIndexed if the module can not be read
// using the index because it contains symlinks.
func openIndex(modroot string, ismodcache bool) (*ModuleIndex, error) {
func openIndexModule(modroot string, ismodcache bool) (*Module, error) {
type result struct {
mi *ModuleIndex
mi *Module
err error
}
r := fcache.Do(modroot, func() any {
r := mcache.Do(modroot, func() any {
id, err := moduleHash(modroot, ismodcache)
if err != nil {
return result{nil, err}
@ -133,8 +199,38 @@ func openIndex(modroot string, ismodcache bool) (*ModuleIndex, error) {
return r.mi, r.err
}
// fromBytes returns a *ModuleIndex given the encoded representation.
func fromBytes(moddir string, data []byte) (mi *ModuleIndex, err error) {
var pcache par.Cache
func openIndexPackage(modroot, pkgdir string) (*IndexPackage, error) {
type result struct {
pkg *IndexPackage
err error
}
r := pcache.Do(pkgdir, func() any {
id, err := dirHash(pkgdir)
if err != nil {
return result{nil, err}
}
data, _, err := cache.Default().GetMmap(id)
if err != nil {
// Couldn't read from index. Assume we couldn't read from
// the index because the package hasn't been indexed yet.
data = indexPackage(modroot, pkgdir)
if err = cache.Default().PutBytes(id, data); err != nil {
return result{nil, err}
}
}
pkg, err := packageFromBytes(modroot, data)
if err != nil {
return result{nil, err}
}
return result{pkg, nil}
}).(result)
return r.pkg, r.err
}
// fromBytes returns a *Module given the encoded representation.
func fromBytes(moddir string, data []byte) (mi *Module, err error) {
if !enabled {
panic("use of index")
}
@ -184,7 +280,7 @@ func fromBytes(moddir string, data []byte) (mi *ModuleIndex, err error) {
packages[packagePaths[i]] = packageOffsets[i]
}
return &ModuleIndex{
return &Module{
moddir,
offsetDecoder{data, st},
packages,
@ -192,21 +288,60 @@ func fromBytes(moddir string, data []byte) (mi *ModuleIndex, err error) {
}, nil
}
// packageFromBytes returns a *IndexPackage given the encoded representation.
func packageFromBytes(modroot string, data []byte) (p *IndexPackage, err error) {
if !enabled {
panic("use of package index when not enabled")
}
// SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed
// that all its errors satisfy this interface, we'll only check for these errors so that
// we don't suppress panics that could have been produced from other sources.
type addrer interface {
Addr() uintptr
}
// set PanicOnFault to true so that we can catch errors on the initial reads of the slice,
// in case it's mmapped (the common case).
old := debug.SetPanicOnFault(true)
defer func() {
debug.SetPanicOnFault(old)
if e := recover(); e != nil {
if _, ok := e.(addrer); ok {
// This panic was almost certainly caused by SetPanicOnFault.
err = fmt.Errorf("error reading module index: %v", e)
return
}
// The panic was likely not caused by SetPanicOnFault.
panic(e)
}
}()
gotVersion, unread, _ := bytes.Cut(data, []byte{'\n'})
if string(gotVersion) != indexVersion {
return nil, fmt.Errorf("bad index version string: %q", gotVersion)
}
stringTableOffset, unread := binary.LittleEndian.Uint32(unread[:4]), unread[4:]
st := newStringTable(data[stringTableOffset:])
d := &decoder{unread, st}
p = decodePackage(d, offsetDecoder{data, st})
p.modroot = modroot
return p, nil
}
// Returns a list of directory paths, relative to the modroot, for
// packages contained in the module index.
func (mi *ModuleIndex) Packages() []string {
func (mi *Module) Packages() []string {
return mi.packagePaths
}
// RelPath returns the path relative to the module's root.
func (mi *ModuleIndex) RelPath(path string) string {
return str.TrimFilePathPrefix(filepath.Clean(path), mi.modroot) // mi.modroot is already clean
// relPath returns the path relative to the module's root.
func relPath(path, modroot string) string {
return str.TrimFilePathPrefix(filepath.Clean(path), filepath.Clean(modroot))
}
// ImportPackage is the equivalent of build.Import given the information in ModuleIndex.
func (mi *ModuleIndex) Import(bctxt build.Context, relpath string, mode build.ImportMode) (p *build.Package, err error) {
rp := mi.indexPackage(relpath)
// Import is the equivalent of build.Import given the information in Module.
func (rp *IndexPackage) Import(bctxt build.Context, mode build.ImportMode) (p *build.Package, err error) {
defer func() {
if e := recover(); e != nil {
err = fmt.Errorf("error reading module index: %v", e)
@ -218,7 +353,7 @@ func (mi *ModuleIndex) Import(bctxt build.Context, relpath string, mode build.Im
p = &build.Package{}
p.ImportPath = "."
p.Dir = filepath.Join(mi.modroot, rp.dir)
p.Dir = filepath.Join(rp.modroot, rp.dir)
var pkgerr error
switch ctxt.Compiler {
@ -236,7 +371,7 @@ func (mi *ModuleIndex) Import(bctxt build.Context, relpath string, mode build.Im
inTestdata := func(sub string) bool {
return strings.Contains(sub, "/testdata/") || strings.HasSuffix(sub, "/testdata") || str.HasPathPrefix(sub, "testdata")
}
if !inTestdata(relpath) {
if !inTestdata(rp.dir) {
// In build.go, p.Root should only be set in the non-local-import case, or in
// GOROOT or GOPATH. Since module mode only calls Import with path set to "."
// and the module index doesn't apply outside modules, the GOROOT case is
@ -248,8 +383,8 @@ func (mi *ModuleIndex) Import(bctxt build.Context, relpath string, mode build.Im
if ctxt.GOROOT != "" && str.HasFilePathPrefix(p.Dir, cfg.GOROOTsrc) && p.Dir != cfg.GOROOTsrc {
p.Root = ctxt.GOROOT
p.Goroot = true
modprefix := str.TrimFilePathPrefix(mi.modroot, cfg.GOROOTsrc)
p.ImportPath = relpath
modprefix := str.TrimFilePathPrefix(rp.modroot, cfg.GOROOTsrc)
p.ImportPath = rp.dir
if modprefix != "" {
p.ImportPath = filepath.Join(modprefix, p.ImportPath)
}
@ -521,20 +656,21 @@ func IsStandardPackage(goroot_, compiler, path string) bool {
reldir = str.TrimFilePathPrefix(reldir, "cmd")
modroot = filepath.Join(modroot, "cmd")
}
mod, err := Get(modroot)
if err != nil {
if _, err := GetPackage(modroot, filepath.Join(modroot, reldir)); err == nil {
// Note that goroot.IsStandardPackage doesn't check that the directory
// actually contains any go files-- merely that it exists. GetPackage
// returning a nil error is enough for us to know the directory exists.
return true
} else if errors.Is(err, ErrNotIndexed) {
// Fall back because package isn't indexable. (Probably because
// a file was modified recently)
return goroot.IsStandardPackage(goroot_, compiler, path)
}
pkgs := mod.Packages()
i := sort.SearchStrings(pkgs, reldir)
return i != len(pkgs) && pkgs[i] == reldir
return false
}
// IsDirWithGoFiles is the equivalent of fsys.IsDirWithGoFiles using the information in the index.
func (mi *ModuleIndex) IsDirWithGoFiles(relpath string) (_ bool, err error) {
rp := mi.indexPackage(relpath)
func (rp *IndexPackage) IsDirWithGoFiles() (_ bool, err error) {
defer func() {
if e := recover(); e != nil {
err = fmt.Errorf("error reading module index: %v", e)
@ -549,9 +685,7 @@ func (mi *ModuleIndex) IsDirWithGoFiles(relpath string) (_ bool, err error) {
}
// ScanDir implements imports.ScanDir using the information in the index.
func (mi *ModuleIndex) ScanDir(path string, tags map[string]bool) (sortedImports []string, sortedTestImports []string, err error) {
rp := mi.indexPackage(path)
func (rp *IndexPackage) ScanDir(tags map[string]bool) (sortedImports []string, sortedTestImports []string, err error) {
// TODO(matloob) dir should eventually be relative to indexed directory
// TODO(matloob): skip reading raw package and jump straight to data we need?
@ -639,20 +773,22 @@ func shouldBuild(sf *sourceFile, tags map[string]bool) bool {
return true
}
// index package holds the information needed to access information in the
// index about a package.
type indexPackage struct {
// IndexPackage holds the information needed to access information in the
// index needed to load a package in a specific directory.
type IndexPackage struct {
error error
dir string // directory of the package relative to the modroot
modroot string
// Source files
sourceFiles []*sourceFile
}
var errCannotFindPackage = errors.New("cannot find package")
// indexPackage returns an indexPackage constructed using the information in the ModuleIndex.
func (mi *ModuleIndex) indexPackage(path string) *indexPackage {
// Package returns an IndexPackage constructed using the information in the Module.
func (mi *Module) Package(path string) *IndexPackage {
defer func() {
if e := recover(); e != nil {
base.Fatalf("error reading module index: %v", e)
@ -660,12 +796,18 @@ func (mi *ModuleIndex) indexPackage(path string) *indexPackage {
}()
offset, ok := mi.packages[path]
if !ok {
return &indexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(mi.modroot, path))}
return &IndexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(mi.modroot, path))}
}
// TODO(matloob): do we want to lock on the module index?
d := mi.od.decoderAt(offset)
rp := new(indexPackage)
p := decodePackage(d, mi.od)
p.modroot = mi.modroot
return p
}
func decodePackage(d *decoder, od offsetDecoder) *IndexPackage {
rp := new(IndexPackage)
if errstr := d.string(); errstr != "" {
rp.error = errors.New(errstr)
}
@ -675,7 +817,7 @@ func (mi *ModuleIndex) indexPackage(path string) *indexPackage {
for i := uint32(0); i < numSourceFiles; i++ {
offset := d.uint32()
rp.sourceFiles[i] = &sourceFile{
od: mi.od.offsetDecoderAt(offset),
od: od.offsetDecoderAt(offset),
}
}
return rp

View File

@ -65,7 +65,15 @@ func indexModule(modroot string) ([]byte, error) {
if err != nil {
return nil, err
}
return encodeModule(packages), nil
return encodeModuleBytes(packages), nil
}
// indexModule indexes the package at the given directory and returns its
// encoded representation. It returns ErrNotIndexed if the package can't
// be indexed.
func indexPackage(modroot, pkgdir string) []byte {
p := importRaw(modroot, relPath(pkgdir, modroot))
return encodePackageBytes(p)
}
// rawPackage holds the information from each package that's needed to

View File

@ -11,9 +11,9 @@ import (
const indexVersion = "go index v0"
// encodeModule produces the encoded representation of the module index.
// encodeModule may modify the packages slice.
func encodeModule(packages []*rawPackage) []byte {
// encodeModuleBytes produces the encoded representation of the module index.
// encodeModuleBytes may modify the packages slice.
func encodeModuleBytes(packages []*rawPackage) []byte {
e := newEncoder()
e.Bytes([]byte(indexVersion))
e.Bytes([]byte{'\n'})
@ -39,6 +39,18 @@ func encodeModule(packages []*rawPackage) []byte {
return e.b
}
func encodePackageBytes(p *rawPackage) []byte {
e := newEncoder()
e.Bytes([]byte(indexVersion))
e.Bytes([]byte{'\n'})
stringTableOffsetPos := e.Pos() // fill this at the end
e.Uint32(0) // string table offset
encodePackage(e, p)
e.IntAt(e.Pos(), stringTableOffsetPos)
e.Bytes(e.stringTable)
return e.b
}
func encodePackage(e *encoder, p *rawPackage) {
e.String(p.error)
e.String(p.dir)

View File

@ -657,11 +657,11 @@ func dirInModule(path, mpath, mdir string, isLocal bool) (dir string, haveGoFile
// We don't care about build tags, not even "+build ignore".
// We're just looking for a plausible directory.
res := haveGoFilesCache.Do(dir, func() any {
// modindex.Get will return ErrNotIndexed for any directories which
// modindex.GetPackage will return ErrNotIndexed for any directories which
// are reached through a symlink, so that they will be handled by
// fsys.IsDirWithGoFiles below.
if mi, err := modindex.Get(mdir); err == nil {
isDirWithGoFiles, err := mi.IsDirWithGoFiles(mi.RelPath(dir))
if ip, err := modindex.GetPackage(mdir, dir); err == nil {
isDirWithGoFiles, err := ip.IsDirWithGoFiles()
return goFilesEntry{isDirWithGoFiles, err}
} else if !errors.Is(err, modindex.ErrNotIndexed) {
return goFilesEntry{err: err}

View File

@ -2102,8 +2102,8 @@ func (ld *loader) checkTidyCompatibility(ctx context.Context, rs *Requirements)
// may see these legacy imports. We drop them so that the module
// search does not look for modules to try to satisfy them.
func scanDir(modroot string, dir string, tags map[string]bool) (imports_, testImports []string, err error) {
if mi, mierr := modindex.Get(modroot); mierr == nil {
imports_, testImports, err = mi.ScanDir(mi.RelPath(dir), tags)
if ip, mierr := modindex.GetPackage(modroot, dir); mierr == nil {
imports_, testImports, err = ip.ScanDir(tags)
goto Happy
} else if !errors.Is(mierr, modindex.ErrNotIndexed) {
return nil, nil, mierr

View File

@ -195,7 +195,7 @@ func matchPackages(ctx context.Context, m *search.Match, tags map[string]bool, f
}
modPrefix = mod.Path
}
if mi, err := modindex.Get(root); err == nil {
if mi, err := modindex.GetModule(root); err == nil {
walkFromIndex(mi, modPrefix, isMatch, treeCanMatch, tags, have, addPkg)
continue
} else if !errors.Is(err, modindex.ErrNotIndexed) {
@ -213,9 +213,9 @@ func matchPackages(ctx context.Context, m *search.Match, tags map[string]bool, f
}
// walkFromIndex matches packages in a module using the module index. modroot
// is the module's root directory on disk, index is the ModuleIndex for the
// is the module's root directory on disk, index is the modindex.Module for the
// module, and importPathRoot is the module's path prefix.
func walkFromIndex(index *modindex.ModuleIndex, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) {
func walkFromIndex(index *modindex.Module, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) {
loopPackages:
for _, reldir := range index.Packages() {
// Avoid .foo, _foo, and testdata subdirectory trees.
@ -252,7 +252,7 @@ loopPackages:
if !have[name] {
have[name] = true
if isMatch(name) {
if _, _, err := index.ScanDir(reldir, tags); err != imports.ErrNoGo {
if _, _, err := index.Package(reldir).ScanDir(tags); err != imports.ErrNoGo {
addPkg(name)
}
}