cmd/internal/obj: refactor code to separate content-addressable symbols by section

The goal of this change is to improve the documentation and make it easier to keep Link.NumberSyms and writer.contentHash aligned. No functional changes. A subsequent change will add conditions to contentHashSection. Change-Id: I0a274f6974459d34d5a8553081f33ea4cd87f248 Reviewed-on: https://go-review.googlesource.com/c/go/+/352669 Trust: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
2024-11-02 11:50:30 +00:00 · 2021-09-27 13:39:43 -07:00 · 2021-09-27 13:39:43 -07:00 · 534dfb2aeb
commit 534dfb2aeb
parent 850a4ffb63
3 changed files with 27 additions and 11 deletions
--- a/src/cmd/internal/obj/objfile.go
+++ b/src/cmd/internal/obj/objfile.go
@ -381,7 +381,26 @@ func (w *writer) Hash(s *LSym) {
 	w.Bytes(b[:])
 }

+// contentHashSection returns a mnemonic for s's section.
+// The goal is to prevent content-addressability from moving symbols between sections.
+// contentHashSection only distinguishes between sets of sections for which this matters.
+// Allowing flexibility increases the effectiveness of content-addressibility.
+// But in some cases, such as doing addressing based on a base symbol,
+// we need to ensure that a symbol is always in a prticular section.
+// Some of these conditions are duplicated in cmd/link/internal/ld.(*Link).symtab.
+// TODO: instead of duplicating them, have the compiler decide where symbols go.
+func contentHashSection(s *LSym) byte {
+	name := s.Name
+	if strings.HasPrefix(name, "type.") {
+		return 'T'
+	}
+	return 0
+}
+
 func contentHash64(s *LSym) goobj.Hash64Type {
+	if contentHashSection(s) != 0 {
+		panic("short hash of non-default-section sym " + s.Name)
+	}
 	var b goobj.Hash64Type
 	copy(b[:], s.P)
 	return b
@ -416,15 +435,10 @@ func (w *writer) contentHash(s *LSym) goobj.HashType {
 	// In this case, if the smaller symbol is alive, the larger is not kept unless
 	// needed.
 	binary.LittleEndian.PutUint64(tmp[:8], uint64(s.Size))
-	h.Write(tmp[:8])
+	// Some symbols require being in separate sections.
+	tmp[8] = contentHashSection(s)
+	h.Write(tmp[:9])

-	// Don't dedup type symbols with others, as they are in a different
-	// section.
-	if strings.HasPrefix(s.Name, "type.") {
-		h.Write([]byte{'T'})
-	} else {
-		h.Write([]byte{0})
-	}
 	// The compiler trims trailing zeros _sometimes_. We just do
 	// it always.
 	h.Write(bytes.TrimRight(s.P, "\x00"))
--- a/src/cmd/internal/obj/sym.go
+++ b/src/cmd/internal/obj/sym.go
@ -39,7 +39,6 @@ import (
 	"log"
 	"math"
 	"sort"
-	"strings"
 )

 func Linknew(arch *LinkArch) *Link {
@ -206,9 +205,10 @@ func (ctxt *Link) NumberSyms() {
 		// if Pkgpath is unknown, cannot hash symbols with relocations, as it
 		// may reference named symbols whose names are not fully expanded.
 		if s.ContentAddressable() && (ctxt.Pkgpath != "" || len(s.R) == 0) {
-			if s.Size <= 8 && len(s.R) == 0 && !strings.HasPrefix(s.Name, "type.") {
+			if s.Size <= 8 && len(s.R) == 0 && contentHashSection(s) == 0 {
 				// We can use short hash only for symbols without relocations.
-				// Don't use short hash for type symbols, as they need special handling.
+				// Don't use short hash for symbols that belong in a particular section
+				// or require special handling (such as type symbols).
 				s.PkgIdx = goobj.PkgIdxHashed64
 				s.SymIdx = hashed64idx
 				if hashed64idx != int32(len(ctxt.hashed64defs)) {
--- a/src/cmd/link/internal/ld/symtab.go
+++ b/src/cmd/link/internal/ld/symtab.go
@ -525,6 +525,8 @@ func (ctxt *Link) symtab(pcln *pclntab) []sym.SymKind {
 	// within a type they sort by size, so the .* symbols
 	// just defined above will be first.
 	// hide the specific symbols.
+	// Some of these symbol section conditions are duplicated
+	// in cmd/internal/obj.contentHashSection.
 	nsym := loader.Sym(ldr.NSym())
 	symGroupType := make([]sym.SymKind, nsym)
 	for s := loader.Sym(1); s < nsym; s++ {