Reduce allocations in Walkdir (#17036)

This commit is contained in:
Klaus Post 2023-04-15 10:25:25 -07:00 committed by GitHub
parent dd9ed85e22
commit 839b9c9271
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 174 additions and 10 deletions

View file

@ -18,6 +18,7 @@
package cmd
import (
"bytes"
"context"
"fmt"
"io"
@ -126,6 +127,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
scanDir = func(current string) error {
// Skip forward, if requested...
var sb bytes.Buffer
forward := ""
if len(opts.ForwardTo) > 0 && strings.HasPrefix(opts.ForwardTo, current) {
forward = strings.TrimPrefix(opts.ForwardTo, current)
@ -160,6 +162,9 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
return nil
}
dirObjects := make(map[string]struct{})
// Avoid a bunch of cleanup when joining.
current = strings.Trim(current, SlashSeparator)
for i, entry := range entries {
if opts.Limit > 0 && objsReturned >= opts.Limit {
return nil
@ -176,7 +181,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
entries[i] = ""
continue
}
if strings.HasSuffix(entry, slashSeparator) {
if hasSuffixByte(entry, SlashSeparatorChar) {
if strings.HasSuffix(entry, globalDirSuffixWithSlash) {
// Add without extension so it is sorted correctly.
entry = strings.TrimSuffix(entry, globalDirSuffixWithSlash) + slashSeparator
@ -198,7 +203,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
if HasSuffix(entry, xlStorageFormatFile) {
var meta metaCacheEntry
s.walkReadMu.Lock()
meta.metadata, err = s.readMetadata(ctx, pathJoin(volumeDir, current, entry))
meta.metadata, err = s.readMetadata(ctx, pathJoinBuf(&sb, volumeDir, current, entry))
s.walkReadMu.Unlock()
diskHealthCheckOK(ctx, err)
if err != nil {
@ -212,7 +217,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
}
meta.name = strings.TrimSuffix(entry, xlStorageFormatFile)
meta.name = strings.TrimSuffix(meta.name, SlashSeparator)
meta.name = pathJoin(current, meta.name)
meta.name = pathJoinBuf(&sb, current, meta.name)
meta.name = decodeDirObject(meta.name)
objReturned(meta.metadata)
@ -223,7 +228,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
if HasSuffix(entry, xlStorageFormatFileV1) {
var meta metaCacheEntry
s.walkReadMu.Lock()
meta.metadata, err = xioutil.ReadFile(pathJoin(volumeDir, current, entry))
meta.metadata, err = xioutil.ReadFile(pathJoinBuf(&sb, volumeDir, current, entry))
s.walkReadMu.Unlock()
diskHealthCheckOK(ctx, err)
if err != nil {
@ -234,7 +239,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
}
meta.name = strings.TrimSuffix(entry, xlStorageFormatFileV1)
meta.name = strings.TrimSuffix(meta.name, SlashSeparator)
meta.name = pathJoin(current, meta.name)
meta.name = pathJoinBuf(&sb, current, meta.name)
objReturned(meta.metadata)
out <- meta
@ -267,7 +272,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
if contextCanceled(ctx) {
return ctx.Err()
}
meta := metaCacheEntry{name: pathJoin(current, entry)}
meta := metaCacheEntry{name: pathJoinBuf(&sb, current, entry)}
// If directory entry on stack before this, pop it now.
for len(dirStack) > 0 && dirStack[len(dirStack)-1] < meta.name {
@ -291,7 +296,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
}
s.walkReadMu.Lock()
meta.metadata, err = s.readMetadata(ctx, pathJoin(volumeDir, meta.name, xlStorageFormatFile))
meta.metadata, err = s.readMetadata(ctx, pathJoinBuf(&sb, volumeDir, meta.name, xlStorageFormatFile))
s.walkReadMu.Unlock()
diskHealthCheckOK(ctx, err)
switch {
@ -304,7 +309,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
out <- meta
case osIsNotExist(err), isSysErrIsDir(err):
meta.metadata, err = xioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFileV1))
meta.metadata, err = xioutil.ReadFile(pathJoinBuf(&sb, volumeDir, meta.name, xlStorageFormatFileV1))
diskHealthCheckOK(ctx, err)
if err == nil {
// It was an object
@ -317,7 +322,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
// NOT an object, append to stack (with slash)
// If dirObject, but no metadata (which is unexpected) we skip it.
if !isDirObj {
if !isDirEmpty(pathJoin(volumeDir, meta.name+slashSeparator)) {
if !isDirEmpty(pathJoinBuf(&sb, volumeDir, meta.name)) {
dirStack = append(dirStack, meta.name+slashSeparator)
}
}

View file

@ -209,6 +209,9 @@ func checkObjectNameForLengthAndSlash(bucket, object string) error {
// SlashSeparator - slash separator.
const SlashSeparator = "/"
// SlashSeparatorChar - slash separator.
const SlashSeparatorChar = '/'
// retainSlash - retains slash from a path.
func retainSlash(s string) string {
if s == "" {
@ -231,13 +234,102 @@ func pathsJoinPrefix(prefix string, elem ...string) (paths []string) {
func pathJoin(elem ...string) string {
trailingSlash := ""
if len(elem) > 0 {
if HasSuffix(elem[len(elem)-1], SlashSeparator) {
if hasSuffixByte(elem[len(elem)-1], SlashSeparatorChar) {
trailingSlash = SlashSeparator
}
}
return path.Join(elem...) + trailingSlash
}
// pathJoinBuf - like path.Join() but retains trailing SlashSeparator of the last element.
// Provide a string builder to reduce allocation.
func pathJoinBuf(dst *bytes.Buffer, elem ...string) string {
trailingSlash := len(elem) > 0 && hasSuffixByte(elem[len(elem)-1], SlashSeparatorChar)
dst.Reset()
added := 0
for _, e := range elem {
if added > 0 || e != "" {
if added > 0 {
dst.WriteRune(SlashSeparatorChar)
}
dst.WriteString(e)
added += len(e)
}
}
if pathNeedsClean(dst.Bytes()) {
s := path.Clean(dst.String())
if trailingSlash {
return s + SlashSeparator
}
return s
}
if trailingSlash {
dst.WriteRune(SlashSeparatorChar)
}
return dst.String()
}
// hasSuffixByte returns true if the last byte of s is 'suffix'
func hasSuffixByte(s string, suffix byte) bool {
return len(s) > 0 && s[len(s)-1] == suffix
}
// pathNeedsClean returns whether path.Clean may change the path.
// Will detect all cases that will be cleaned,
// but may produce false positives on non-trivial paths.
func pathNeedsClean(path []byte) bool {
if len(path) == 0 {
return true
}
rooted := path[0] == '/'
n := len(path)
r, w := 0, 0
if rooted {
r, w = 1, 1
}
for r < n {
switch {
case path[r] > 127:
// Non ascii.
return true
case path[r] == '/':
// multiple / elements
return true
case path[r] == '.' && (r+1 == n || path[r+1] == '/'):
// . element - assume it has to be cleaned.
return true
case path[r] == '.' && path[r+1] == '.' && (r+2 == n || path[r+2] == '/'):
// .. element: remove to last / - assume it has to be cleaned.
return true
default:
// real path element.
// add slash if needed
if rooted && w != 1 || !rooted && w != 0 {
w++
}
// copy element
for ; r < n && path[r] != '/'; r++ {
w++
}
// allow one slash, not at end
if r < n-1 && path[r] == '/' {
r++
}
}
}
// Turn empty string into "."
if w == 0 {
return true
}
return false
}
// mustGetUUID - get a random UUID.
func mustGetUUID() string {
u, err := uuid.NewRandom()

View file

@ -740,3 +740,70 @@ func TestS2CompressReader(t *testing.T) {
})
}
}
func Test_pathNeedsClean(t *testing.T) {
type pathTest struct {
path, result string
}
cleantests := []pathTest{
// Already clean
{"", "."},
{"abc", "abc"},
{"abc/def", "abc/def"},
{"a/b/c", "a/b/c"},
{".", "."},
{"..", ".."},
{"../..", "../.."},
{"../../abc", "../../abc"},
{"/abc", "/abc"},
{"/abc/def", "/abc/def"},
{"/", "/"},
// Remove trailing slash
{"abc/", "abc"},
{"abc/def/", "abc/def"},
{"a/b/c/", "a/b/c"},
{"./", "."},
{"../", ".."},
{"../../", "../.."},
{"/abc/", "/abc"},
// Remove doubled slash
{"abc//def//ghi", "abc/def/ghi"},
{"//abc", "/abc"},
{"///abc", "/abc"},
{"//abc//", "/abc"},
{"abc//", "abc"},
// Remove . elements
{"abc/./def", "abc/def"},
{"/./abc/def", "/abc/def"},
{"abc/.", "abc"},
// Remove .. elements
{"abc/def/ghi/../jkl", "abc/def/jkl"},
{"abc/def/../ghi/../jkl", "abc/jkl"},
{"abc/def/..", "abc"},
{"abc/def/../..", "."},
{"/abc/def/../..", "/"},
{"abc/def/../../..", ".."},
{"/abc/def/../../..", "/"},
{"abc/def/../../../ghi/jkl/../../../mno", "../../mno"},
// Combinations
{"abc/./../def", "def"},
{"abc//./../def", "def"},
{"abc/../../././../def", "../../def"},
}
for _, test := range cleantests {
want := test.path != test.result
got := pathNeedsClean([]byte(test.path))
if !got {
t.Logf("no clean: %q", test.path)
}
if want && !got {
t.Errorf("input: %q, want %v, got %v", test.path, want, got)
}
}
}