cmd/go: make module index loading O(1)

For a large module, opening the index was populating tables with
entries for every package in the module. If we are only using a small
number of those packages, this is wasted work that can dwarf the
benefit from the index.

This CL changes the index reader to avoid loading all packages
at module index open time. It also refactors the code somewhat
for clarity.

It also removes some duplication by defining that a per-package
index is a per-module index containing a single package, rather
than having two different formats and two different decoders.

It also changes the string table to use uvarint-prefixed data
instead of having to scan for a NUL byte. This makes random access
to long strings more efficient - O(1) instead of O(n) - and can significantly
speed up the strings.Compare operation in the binary search looking
for a given package.

Also add a direct test of the indexing code.

For #53577.

Change-Id: I7428d28133e4e7fe2d2993fa014896cd15af48af
Reviewed-on: https://go-review.googlesource.com/c/go/+/416178
Reviewed-by: Bryan Mills <bcmills@google.com>
This commit is contained in:
Russ Cox 2022-07-06 09:49:32 -04:00
parent b8bf820d5d
commit 7510e597de
4 changed files with 356 additions and 304 deletions

View file

@ -0,0 +1,87 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package modindex
import (
"encoding/hex"
"encoding/json"
"go/build"
"internal/diff"
"path/filepath"
"reflect"
"runtime"
"testing"
)
func init() {
isTest = true
enabled = true // to allow GODEBUG=goindex=0 go test, when things are very broken
}
func TestIndex(t *testing.T) {
src := filepath.Join(runtime.GOROOT(), "src")
checkPkg := func(t *testing.T, m *Module, pkg string, data []byte) {
p := m.Package(pkg)
bp, err := p.Import(build.Default, build.ImportComment)
if err != nil {
t.Fatal(err)
}
bp1, err := build.Default.Import(pkg, filepath.Join(src, pkg), build.ImportComment)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(bp, bp1) {
t.Errorf("mismatch")
t.Logf("index:\n%s", hex.Dump(data))
js, err := json.MarshalIndent(bp, "", "\t")
if err != nil {
t.Fatal(err)
}
js1, err := json.MarshalIndent(bp1, "", "\t")
if err != nil {
t.Fatal(err)
}
t.Logf("diff:\n%s", diff.Diff("index", js, "correct", js1))
t.FailNow()
}
}
// Check packages in increasing complexity, one at a time.
pkgs := []string{
"crypto",
"encoding",
"unsafe",
"encoding/json",
"runtime",
"net",
}
var raws []*rawPackage
for _, pkg := range pkgs {
raw := importRaw(src, pkg)
raws = append(raws, raw)
t.Run(pkg, func(t *testing.T) {
data := encodeModuleBytes([]*rawPackage{raw})
m, err := fromBytes(src, data)
if err != nil {
t.Fatal(err)
}
checkPkg(t, m, pkg, data)
})
}
// Check that a multi-package index works too.
t.Run("all", func(t *testing.T) {
data := encodeModuleBytes(raws)
m, err := fromBytes(src, data)
if err != nil {
t.Fatal(err)
}
for _, pkg := range pkgs {
checkPkg(t, m, pkg, data)
}
})
}

View file

@ -15,7 +15,6 @@ import (
"internal/godebug" "internal/godebug"
"internal/goroot" "internal/goroot"
"internal/unsafeheader" "internal/unsafeheader"
"math"
"path" "path"
"path/filepath" "path/filepath"
"runtime" "runtime"
@ -45,10 +44,9 @@ var enabled bool = godebug.Get("goindex") != "0"
// do the equivalent of build.Import of packages in the module and answer other // do the equivalent of build.Import of packages in the module and answer other
// questions based on the index file's data. // questions based on the index file's data.
type Module struct { type Module struct {
modroot string modroot string
od offsetDecoder d *decoder
packages map[string]int // offsets of each package n int // number of packages
packagePaths []string // paths to package directories relative to modroot; these are the keys of packages
} }
// moduleHash returns an ActionID corresponding to the state of the module // moduleHash returns an ActionID corresponding to the state of the module
@ -236,110 +234,131 @@ func openIndexPackage(modroot, pkgdir string) (*IndexPackage, error) {
return r.pkg, r.err return r.pkg, r.err
} }
var errCorrupt = errors.New("corrupt index")
// protect marks the start of a large section of code that accesses the index.
// It should be used as:
//
// defer unprotect(protect, &err)
//
// It should not be used for trivial accesses which would be
// dwarfed by the overhead of the defer.
func protect() bool {
return debug.SetPanicOnFault(true)
}
var isTest = false
// unprotect marks the end of a large section of code that accesses the index.
// It should be used as:
//
// defer unprotect(protect, &err)
//
// end looks for panics due to errCorrupt or bad mmap accesses.
// When it finds them, it adds explanatory text, consumes the panic, and sets *errp instead.
// If errp is nil, end adds the explanatory text but then calls base.Fatalf.
func unprotect(old bool, errp *error) {
// SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed
// that all its errors satisfy this interface, we'll only check for these errors so that
// we don't suppress panics that could have been produced from other sources.
type addrer interface {
Addr() uintptr
}
debug.SetPanicOnFault(old)
if e := recover(); e != nil {
if _, ok := e.(addrer); ok || e == errCorrupt {
// This panic was almost certainly caused by SetPanicOnFault or our panic(errCorrupt).
err := fmt.Errorf("error reading module index: %v", e)
if errp != nil {
*errp = err
return
}
if isTest {
panic(err)
}
base.Fatalf("%v", err)
}
// The panic was likely not caused by SetPanicOnFault.
panic(e)
}
}
// fromBytes returns a *Module given the encoded representation. // fromBytes returns a *Module given the encoded representation.
func fromBytes(moddir string, data []byte) (mi *Module, err error) { func fromBytes(moddir string, data []byte) (m *Module, err error) {
if !enabled { if !enabled {
panic("use of index") panic("use of index")
} }
// SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed defer unprotect(protect(), &err)
// that all its errors satisfy this interface, we'll only check for these errors so that
// we don't suppress panics that could have been produced from other sources. if !bytes.HasPrefix(data, []byte(indexVersion+"\n")) {
type addrer interface { return nil, errCorrupt
Addr() uintptr
} }
// set PanicOnFault to true so that we can catch errors on the initial reads of the slice, const hdr = len(indexVersion + "\n")
// in case it's mmapped (the common case). d := &decoder{data: data}
old := debug.SetPanicOnFault(true) str := d.intAt(hdr)
defer func() { if str < hdr+8 || len(d.data) < str {
debug.SetPanicOnFault(old) return nil, errCorrupt
if e := recover(); e != nil {
if _, ok := e.(addrer); ok {
// This panic was almost certainly caused by SetPanicOnFault.
err = fmt.Errorf("error reading module index: %v", e)
return
}
// The panic was likely not caused by SetPanicOnFault.
panic(e)
}
}()
gotVersion, unread, _ := bytes.Cut(data, []byte{'\n'})
if string(gotVersion) != indexVersion {
return nil, fmt.Errorf("bad index version string: %q", gotVersion)
} }
stringTableOffset, unread := binary.LittleEndian.Uint32(unread[:4]), unread[4:] d.data, d.str = data[:str], d.data[str:]
st := newStringTable(data[stringTableOffset:]) // Check that string table looks valid.
d := decoder{unread, st} // First string is empty string (length 0),
numPackages := d.int() // and we leave a marker byte 0xFF at the end
// just to make sure that the file is not truncated.
packagePaths := make([]string, numPackages) if len(d.str) == 0 || d.str[0] != 0 || d.str[len(d.str)-1] != 0xFF {
for i := range packagePaths { return nil, errCorrupt
packagePaths[i] = d.string()
}
packageOffsets := make([]int, numPackages)
for i := range packageOffsets {
packageOffsets[i] = d.int()
}
packages := make(map[string]int, numPackages)
for i := range packagePaths {
packages[packagePaths[i]] = packageOffsets[i]
} }
return &Module{ n := d.intAt(hdr + 4)
if n < 0 || n > (len(d.data)-8)/8 {
return nil, errCorrupt
}
m = &Module{
moddir, moddir,
offsetDecoder{data, st}, d,
packages, n,
packagePaths, }
}, nil return m, nil
} }
// packageFromBytes returns a *IndexPackage given the encoded representation. // packageFromBytes returns a *IndexPackage given the encoded representation.
func packageFromBytes(modroot string, data []byte) (p *IndexPackage, err error) { func packageFromBytes(modroot string, data []byte) (p *IndexPackage, err error) {
if !enabled { m, err := fromBytes(modroot, data)
panic("use of package index when not enabled") if err != nil {
return nil, err
} }
if m.n != 1 {
// SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed return nil, fmt.Errorf("corrupt single-package index")
// that all its errors satisfy this interface, we'll only check for these errors so that
// we don't suppress panics that could have been produced from other sources.
type addrer interface {
Addr() uintptr
} }
return m.pkg(0), nil
// set PanicOnFault to true so that we can catch errors on the initial reads of the slice,
// in case it's mmapped (the common case).
old := debug.SetPanicOnFault(true)
defer func() {
debug.SetPanicOnFault(old)
if e := recover(); e != nil {
if _, ok := e.(addrer); ok {
// This panic was almost certainly caused by SetPanicOnFault.
err = fmt.Errorf("error reading module index: %v", e)
return
}
// The panic was likely not caused by SetPanicOnFault.
panic(e)
}
}()
gotVersion, unread, _ := bytes.Cut(data, []byte{'\n'})
if string(gotVersion) != indexVersion {
return nil, fmt.Errorf("bad index version string: %q", gotVersion)
}
stringTableOffset, unread := binary.LittleEndian.Uint32(unread[:4]), unread[4:]
st := newStringTable(data[stringTableOffset:])
d := &decoder{unread, st}
p = decodePackage(d, offsetDecoder{data, st})
p.modroot = modroot
return p, nil
} }
// Returns a list of directory paths, relative to the modroot, for // pkgDir returns the dir string of the i'th package in the index.
// packages contained in the module index. func (m *Module) pkgDir(i int) string {
func (mi *Module) Packages() []string { if i < 0 || i >= m.n {
return mi.packagePaths panic(errCorrupt)
}
return m.d.stringAt(12 + 8 + 8*i)
}
// pkgOff returns the offset of the data for the i'th package in the index.
func (m *Module) pkgOff(i int) int {
if i < 0 || i >= m.n {
panic(errCorrupt)
}
return m.d.intAt(12 + 8 + 8*i + 4)
}
// Walk calls f for each package in the index, passing the path to that package relative to the module root.
func (m *Module) Walk(f func(path string)) {
defer unprotect(protect(), nil)
for i := 0; i < m.n; i++ {
f(m.pkgDir(i))
}
} }
// relPath returns the path relative to the module's root. // relPath returns the path relative to the module's root.
@ -349,11 +368,7 @@ func relPath(path, modroot string) string {
// Import is the equivalent of build.Import given the information in Module. // Import is the equivalent of build.Import given the information in Module.
func (rp *IndexPackage) Import(bctxt build.Context, mode build.ImportMode) (p *build.Package, err error) { func (rp *IndexPackage) Import(bctxt build.Context, mode build.ImportMode) (p *build.Package, err error) {
defer func() { defer unprotect(protect(), &err)
if e := recover(); e != nil {
err = fmt.Errorf("error reading module index: %v", e)
}
}()
ctxt := (*Context)(&bctxt) ctxt := (*Context)(&bctxt)
@ -794,46 +809,44 @@ type IndexPackage struct {
var errCannotFindPackage = errors.New("cannot find package") var errCannotFindPackage = errors.New("cannot find package")
// Package returns an IndexPackage constructed using the information in the Module. // Package and returns finds the package with the given path (relative to the module root).
func (mi *Module) Package(path string) *IndexPackage { // If the package does not exist, Package returns an IndexPackage that will return an
defer func() { // appropriate error from its methods.
if e := recover(); e != nil { func (m *Module) Package(path string) *IndexPackage {
base.Fatalf("error reading module index: %v", e) defer unprotect(protect(), nil)
}
}()
offset, ok := mi.packages[path]
if !ok {
return &IndexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(mi.modroot, path))}
}
// TODO(matloob): do we want to lock on the module index? i, ok := sort.Find(m.n, func(i int) int {
d := mi.od.decoderAt(offset) return strings.Compare(path, m.pkgDir(i))
p := decodePackage(d, mi.od) })
p.modroot = mi.modroot if !ok {
return p return &IndexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(m.modroot, path))}
}
return m.pkg(i)
} }
func decodePackage(d *decoder, od offsetDecoder) *IndexPackage { // pkgAt returns the i'th IndexPackage in m.
rp := new(IndexPackage) func (m *Module) pkg(i int) *IndexPackage {
if errstr := d.string(); errstr != "" { r := m.d.readAt(m.pkgOff(i))
rp.error = errors.New(errstr) p := new(IndexPackage)
if errstr := r.string(); errstr != "" {
p.error = errors.New(errstr)
} }
rp.dir = d.string() p.dir = r.string()
numSourceFiles := d.uint32() p.sourceFiles = make([]*sourceFile, r.int())
rp.sourceFiles = make([]*sourceFile, numSourceFiles) for i := range p.sourceFiles {
for i := uint32(0); i < numSourceFiles; i++ { p.sourceFiles[i] = &sourceFile{
offset := d.uint32() d: m.d,
rp.sourceFiles[i] = &sourceFile{ pos: r.int(),
od: od.offsetDecoderAt(offset),
} }
} }
return rp p.modroot = m.modroot
return p
} }
// sourceFile represents the information of a given source file in the module index. // sourceFile represents the information of a given source file in the module index.
type sourceFile struct { type sourceFile struct {
od offsetDecoder // od interprets all offsets relative to the start of the source file's data d *decoder // encoding of this source file
pos int // start of sourceFile encoding in d
onceReadImports sync.Once onceReadImports sync.Once
savedImports []rawImport // saved imports so that they're only read once savedImports []rawImport // saved imports so that they're only read once
} }
@ -853,73 +866,67 @@ const (
) )
func (sf *sourceFile) error() string { func (sf *sourceFile) error() string {
return sf.od.stringAt(sourceFileError) return sf.d.stringAt(sf.pos + sourceFileError)
} }
func (sf *sourceFile) parseError() string { func (sf *sourceFile) parseError() string {
return sf.od.stringAt(sourceFileParseError) return sf.d.stringAt(sf.pos + sourceFileParseError)
} }
func (sf *sourceFile) synopsis() string { func (sf *sourceFile) synopsis() string {
return sf.od.stringAt(sourceFileSynopsis) return sf.d.stringAt(sf.pos + sourceFileSynopsis)
} }
func (sf *sourceFile) name() string { func (sf *sourceFile) name() string {
return sf.od.stringAt(sourceFileName) return sf.d.stringAt(sf.pos + sourceFileName)
} }
func (sf *sourceFile) pkgName() string { func (sf *sourceFile) pkgName() string {
return sf.od.stringAt(sourceFilePkgName) return sf.d.stringAt(sf.pos + sourceFilePkgName)
} }
func (sf *sourceFile) ignoreFile() bool { func (sf *sourceFile) ignoreFile() bool {
return sf.od.boolAt(sourceFileIgnoreFile) return sf.d.boolAt(sf.pos + sourceFileIgnoreFile)
} }
func (sf *sourceFile) binaryOnly() bool { func (sf *sourceFile) binaryOnly() bool {
return sf.od.boolAt(sourceFileBinaryOnly) return sf.d.boolAt(sf.pos + sourceFileBinaryOnly)
} }
func (sf *sourceFile) cgoDirectives() string { func (sf *sourceFile) cgoDirectives() string {
return sf.od.stringAt(sourceFileCgoDirectives) return sf.d.stringAt(sf.pos + sourceFileCgoDirectives)
} }
func (sf *sourceFile) goBuildConstraint() string { func (sf *sourceFile) goBuildConstraint() string {
return sf.od.stringAt(sourceFileGoBuildConstraint) return sf.d.stringAt(sf.pos + sourceFileGoBuildConstraint)
} }
func (sf *sourceFile) plusBuildConstraints() []string { func (sf *sourceFile) plusBuildConstraints() []string {
d := sf.od.decoderAt(sourceFileNumPlusBuildConstraints) pos := sf.pos + sourceFileNumPlusBuildConstraints
n := d.int() n := sf.d.intAt(pos)
pos += 4
ret := make([]string, n) ret := make([]string, n)
for i := 0; i < n; i++ { for i := 0; i < n; i++ {
ret[i] = d.string() ret[i] = sf.d.stringAt(pos)
pos += 4
} }
return ret return ret
} }
func importsOffset(numPlusBuildConstraints int) int {
// 4 bytes per uin32, add one to advance past numPlusBuildConstraints itself
return sourceFileNumPlusBuildConstraints + 4*(numPlusBuildConstraints+1)
}
func (sf *sourceFile) importsOffset() int { func (sf *sourceFile) importsOffset() int {
numPlusBuildConstraints := sf.od.intAt(sourceFileNumPlusBuildConstraints) pos := sf.pos + sourceFileNumPlusBuildConstraints
return importsOffset(numPlusBuildConstraints) n := sf.d.intAt(pos)
} // each build constraint is 1 uint32
return pos + 4 + n*4
func embedsOffset(importsOffset, numImports int) int {
// 4 bytes per uint32; 1 to advance past numImports itself, and 5 uint32s per import
return importsOffset + 4*(1+(5*numImports))
} }
func (sf *sourceFile) embedsOffset() int { func (sf *sourceFile) embedsOffset() int {
importsOffset := sf.importsOffset() pos := sf.importsOffset()
numImports := sf.od.intAt(importsOffset) n := sf.d.intAt(pos)
return embedsOffset(importsOffset, numImports) // each import is 5 uint32s (string + tokpos)
return pos + 4 + n*(4*5)
} }
func (sf *sourceFile) imports() []rawImport { func (sf *sourceFile) imports() []rawImport {
sf.onceReadImports.Do(func() { sf.onceReadImports.Do(func() {
importsOffset := sf.importsOffset() importsOffset := sf.importsOffset()
d := sf.od.decoderAt(importsOffset) r := sf.d.readAt(importsOffset)
numImports := d.int() numImports := r.int()
ret := make([]rawImport, numImports) ret := make([]rawImport, numImports)
for i := 0; i < numImports; i++ { for i := 0; i < numImports; i++ {
ret[i].path = d.string() ret[i] = rawImport{r.string(), r.tokpos()}
ret[i].position = d.tokpos()
} }
sf.savedImports = ret sf.savedImports = ret
}) })
@ -928,125 +935,15 @@ func (sf *sourceFile) imports() []rawImport {
func (sf *sourceFile) embeds() []embed { func (sf *sourceFile) embeds() []embed {
embedsOffset := sf.embedsOffset() embedsOffset := sf.embedsOffset()
d := sf.od.decoderAt(embedsOffset) r := sf.d.readAt(embedsOffset)
numEmbeds := d.int() numEmbeds := r.int()
ret := make([]embed, numEmbeds) ret := make([]embed, numEmbeds)
for i := range ret { for i := range ret {
pattern := d.string() ret[i] = embed{r.string(), r.tokpos()}
pos := d.tokpos()
ret[i] = embed{pattern, pos}
} }
return ret return ret
} }
// A decoder reads from the current position of the file and advances its position as it
// reads.
type decoder struct {
b []byte
st *stringTable
}
func (d *decoder) uint32() uint32 {
n := binary.LittleEndian.Uint32(d.b[:4])
d.b = d.b[4:]
return n
}
func (d *decoder) int() int {
n := d.uint32()
if int64(n) > math.MaxInt {
base.Fatalf("go: attempting to read a uint32 from the index that overflows int")
}
return int(n)
}
func (d *decoder) tokpos() token.Position {
file := d.string()
offset := d.int()
line := d.int()
column := d.int()
return token.Position{
Filename: file,
Offset: offset,
Line: line,
Column: column,
}
}
func (d *decoder) string() string {
return d.st.string(d.int())
}
// And offset decoder reads information offset from its position in the file.
// It's either offset from the beginning of the index, or the beginning of a sourceFile's data.
type offsetDecoder struct {
b []byte
st *stringTable
}
func (od *offsetDecoder) uint32At(offset int) uint32 {
if offset > len(od.b) {
base.Fatalf("go: trying to read from index file at offset higher than file length. This indicates a corrupt offset file in the cache.")
}
return binary.LittleEndian.Uint32(od.b[offset:])
}
func (od *offsetDecoder) intAt(offset int) int {
n := od.uint32At(offset)
if int64(n) > math.MaxInt {
base.Fatalf("go: attempting to read a uint32 from the index that overflows int")
}
return int(n)
}
func (od *offsetDecoder) boolAt(offset int) bool {
switch v := od.uint32At(offset); v {
case 0:
return false
case 1:
return true
default:
base.Fatalf("go: invalid bool value in index file encoding: %v", v)
}
panic("unreachable")
}
func (od *offsetDecoder) stringAt(offset int) string {
return od.st.string(od.intAt(offset))
}
func (od *offsetDecoder) decoderAt(offset int) *decoder {
return &decoder{od.b[offset:], od.st}
}
func (od *offsetDecoder) offsetDecoderAt(offset uint32) offsetDecoder {
return offsetDecoder{od.b[offset:], od.st}
}
type stringTable struct {
b []byte
}
func newStringTable(b []byte) *stringTable {
return &stringTable{b: b}
}
func (st *stringTable) string(pos int) string {
if pos == 0 {
return ""
}
bb := st.b[pos:]
i := bytes.IndexByte(bb, 0)
if i == -1 {
panic("reached end of string table trying to read string")
}
s := asString(bb[:i])
return s
}
func asString(b []byte) string { func asString(b []byte) string {
p := (*unsafeheader.Slice)(unsafe.Pointer(&b)).Data p := (*unsafeheader.Slice)(unsafe.Pointer(&b)).Data
@ -1057,3 +954,82 @@ func asString(b []byte) string {
return s return s
} }
// A decoder helps decode the index format.
type decoder struct {
data []byte // data after header
str []byte // string table
}
// intAt returns the int at the given offset in d.data.
func (d *decoder) intAt(off int) int {
if off < 0 || len(d.data)-off < 4 {
panic(errCorrupt)
}
i := binary.LittleEndian.Uint32(d.data[off : off+4])
if int32(i)>>31 != 0 {
panic(errCorrupt)
}
return int(i)
}
// boolAt returns the bool at the given offset in d.data.
func (d *decoder) boolAt(off int) bool {
return d.intAt(off) != 0
}
// stringTableAt returns the string pointed at by the int at the given offset in d.data.
func (d *decoder) stringAt(off int) string {
return d.stringTableAt(d.intAt(off))
}
// stringTableAt returns the string at the given offset in the string table d.str.
func (d *decoder) stringTableAt(off int) string {
if off < 0 || off >= len(d.str) {
panic(errCorrupt)
}
s := d.str[off:]
v, n := binary.Uvarint(s)
if n <= 0 || v > uint64(len(s[n:])) {
panic(errCorrupt)
}
return asString(s[n : n+int(v)])
}
// A reader reads sequential fields from a section of the index format.
type reader struct {
d *decoder
pos int
}
// readAt returns a reader starting at the given position in d.
func (d *decoder) readAt(pos int) *reader {
return &reader{d, pos}
}
// int reads the next int.
func (r *reader) int() int {
i := r.d.intAt(r.pos)
r.pos += 4
return i
}
// string reads the next string.
func (r *reader) string() string {
return r.d.stringTableAt(r.int())
}
// bool reads the next bool.
func (r *reader) bool() bool {
return r.int() != 0
}
// tokpos reads the next token.Position.
func (r *reader) tokpos() token.Position {
return token.Position{
Filename: r.string(),
Offset: r.int(),
Line: r.int(),
Column: r.int(),
}
}

View file

@ -1,54 +1,46 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package modindex package modindex
import ( import (
"cmd/go/internal/base" "cmd/go/internal/base"
"encoding/binary" "encoding/binary"
"go/token" "go/token"
"math"
"sort" "sort"
"strings"
) )
const indexVersion = "go index v0" const indexVersion = "go index v1" // 11 bytes (plus \n), to align uint32s in index
// encodeModuleBytes produces the encoded representation of the module index. // encodeModuleBytes produces the encoded representation of the module index.
// encodeModuleBytes may modify the packages slice. // encodeModuleBytes may modify the packages slice.
func encodeModuleBytes(packages []*rawPackage) []byte { func encodeModuleBytes(packages []*rawPackage) []byte {
e := newEncoder() e := newEncoder()
e.Bytes([]byte(indexVersion)) e.Bytes([]byte(indexVersion + "\n"))
e.Bytes([]byte{'\n'})
stringTableOffsetPos := e.Pos() // fill this at the end stringTableOffsetPos := e.Pos() // fill this at the end
e.Uint32(0) // string table offset e.Uint32(0) // string table offset
e.Int(len(packages))
sort.Slice(packages, func(i, j int) bool { sort.Slice(packages, func(i, j int) bool {
return packages[i].dir < packages[j].dir return packages[i].dir < packages[j].dir
}) })
e.Int(len(packages))
packagesPos := e.Pos()
for _, p := range packages { for _, p := range packages {
e.String(p.dir) e.String(p.dir)
}
packagesOffsetPos := e.Pos()
for range packages {
e.Int(0) e.Int(0)
} }
for i, p := range packages { for i, p := range packages {
e.IntAt(e.Pos(), packagesOffsetPos+4*i) e.IntAt(e.Pos(), packagesPos+8*i+4)
encodePackage(e, p) encodePackage(e, p)
} }
e.IntAt(e.Pos(), stringTableOffsetPos) e.IntAt(e.Pos(), stringTableOffsetPos)
e.Bytes(e.stringTable) e.Bytes(e.stringTable)
e.Bytes([]byte{0xFF}) // end of string table marker
return e.b return e.b
} }
func encodePackageBytes(p *rawPackage) []byte { func encodePackageBytes(p *rawPackage) []byte {
e := newEncoder() return encodeModuleBytes([]*rawPackage{p})
e.Bytes([]byte(indexVersion))
e.Bytes([]byte{'\n'})
stringTableOffsetPos := e.Pos() // fill this at the end
e.Uint32(0) // string table offset
encodePackage(e, p)
e.IntAt(e.Pos(), stringTableOffsetPos)
e.Bytes(e.stringTable)
return e.b
} }
func encodePackage(e *encoder, p *rawPackage) { func encodePackage(e *encoder, p *rawPackage) {
@ -126,9 +118,6 @@ func (e *encoder) Bytes(b []byte) {
} }
func (e *encoder) String(s string) { func (e *encoder) String(s string) {
if strings.IndexByte(s, 0) >= 0 {
base.Fatalf("go: attempting to encode a string containing a null byte")
}
if n, ok := e.strings[s]; ok { if n, ok := e.strings[s]; ok {
e.Int(n) e.Int(n)
return return
@ -136,8 +125,8 @@ func (e *encoder) String(s string) {
pos := len(e.stringTable) pos := len(e.stringTable)
e.strings[s] = pos e.strings[s] = pos
e.Int(pos) e.Int(pos)
e.stringTable = binary.AppendUvarint(e.stringTable, uint64(len(s)))
e.stringTable = append(e.stringTable, []byte(s)...) e.stringTable = append(e.stringTable, []byte(s)...)
e.stringTable = append(e.stringTable, 0)
} }
func (e *encoder) Bool(b bool) { func (e *encoder) Bool(b bool) {
@ -152,17 +141,18 @@ func (e *encoder) Uint32(n uint32) {
e.b = binary.LittleEndian.AppendUint32(e.b, n) e.b = binary.LittleEndian.AppendUint32(e.b, n)
} }
// Int encodes n. Note that all ints are written to the index as uint32s. // Int encodes n. Note that all ints are written to the index as uint32s,
// and to avoid problems on 32-bit systems we require fitting into a 32-bit int.
func (e *encoder) Int(n int) { func (e *encoder) Int(n int) {
if n < 0 || int64(n) > math.MaxUint32 { if n < 0 || int(int32(n)) != n {
base.Fatalf("go: attempting to write an int to the index that overflows uint32") base.Fatalf("go: attempting to write an int to the index that overflows int32")
} }
e.Uint32(uint32(n)) e.Uint32(uint32(n))
} }
func (e *encoder) IntAt(n int, at int) { func (e *encoder) IntAt(n int, at int) {
if n < 0 || int64(n) > math.MaxUint32 { if n < 0 || int(int32(n)) != n {
base.Fatalf("go: attempting to write an int to the index that overflows uint32") base.Fatalf("go: attempting to write an int to the index that overflows int32")
} }
binary.LittleEndian.PutUint32(e.b[at:], uint32(n)) binary.LittleEndian.PutUint32(e.b[at:], uint32(n))
} }

View file

@ -216,21 +216,20 @@ func matchPackages(ctx context.Context, m *search.Match, tags map[string]bool, f
// is the module's root directory on disk, index is the modindex.Module for the // is the module's root directory on disk, index is the modindex.Module for the
// module, and importPathRoot is the module's path prefix. // module, and importPathRoot is the module's path prefix.
func walkFromIndex(index *modindex.Module, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) { func walkFromIndex(index *modindex.Module, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) {
loopPackages: index.Walk(func(reldir string) {
for _, reldir := range index.Packages() {
// Avoid .foo, _foo, and testdata subdirectory trees. // Avoid .foo, _foo, and testdata subdirectory trees.
p := reldir p := reldir
for { for {
elem, rest, found := strings.Cut(p, string(filepath.Separator)) elem, rest, found := strings.Cut(p, string(filepath.Separator))
if strings.HasPrefix(elem, ".") || strings.HasPrefix(elem, "_") || elem == "testdata" { if strings.HasPrefix(elem, ".") || strings.HasPrefix(elem, "_") || elem == "testdata" {
continue loopPackages return
} }
if found && elem == "vendor" { if found && elem == "vendor" {
// Ignore this path if it contains the element "vendor" anywhere // Ignore this path if it contains the element "vendor" anywhere
// except for the last element (packages named vendor are allowed // except for the last element (packages named vendor are allowed
// for historical reasons). Note that found is true when this // for historical reasons). Note that found is true when this
// isn't the last path element. // isn't the last path element.
continue loopPackages return
} }
if !found { if !found {
// Didn't find the separator, so we're considering the last element. // Didn't find the separator, so we're considering the last element.
@ -241,12 +240,12 @@ loopPackages:
// Don't use GOROOT/src. // Don't use GOROOT/src.
if reldir == "" && importPathRoot == "" { if reldir == "" && importPathRoot == "" {
continue return
} }
name := path.Join(importPathRoot, filepath.ToSlash(reldir)) name := path.Join(importPathRoot, filepath.ToSlash(reldir))
if !treeCanMatch(name) { if !treeCanMatch(name) {
continue return
} }
if !have[name] { if !have[name] {
@ -257,7 +256,7 @@ loopPackages:
} }
} }
} }
} })
} }
// MatchInModule identifies the packages matching the given pattern within the // MatchInModule identifies the packages matching the given pattern within the