optimize startup sequence performance (#19009)

- bucket metadata does not need to look for legacy things
  anymore if b.Created is non-zero

- stagger bucket metadata loads across lots of nodes to
  avoid the current thundering herd problem.

- Remove deadlines for RenameData, RenameFile - these
  calls should not ever be timed out and should wait
  until completion or wait for client timeout. Do not
  choose timeouts for applications during the WRITE phase.

- increase R/W buffer size, increase maxMergeMessages to 30
This commit is contained in:
Harshavardhana 2024-02-08 11:21:21 -08:00 committed by GitHub
parent 7ec43bd177
commit 035a3ea4ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 37 additions and 39 deletions

View file

@ -22,6 +22,7 @@ import (
"encoding/xml"
"errors"
"fmt"
"math/rand"
"sync"
"time"
@ -482,11 +483,11 @@ func (sys *BucketMetadataSys) concurrentLoad(ctx context.Context, buckets []Buck
for index := range buckets {
index := index
g.Go(func() error {
_, _ = sys.objAPI.HealBucket(ctx, buckets[index].Name, madmin.HealOpts{
// Ensure heal opts for bucket metadata be deep healed all the time.
ScanMode: madmin.HealDeepScan,
Recreate: true,
})
// Sleep and stagger to avoid blocked CPU and thundering
// herd upon start up sequence.
time.Sleep(25*time.Millisecond + time.Duration(rand.Int63n(int64(100*time.Millisecond))))
_, _ = sys.objAPI.HealBucket(ctx, buckets[index].Name, madmin.HealOpts{Recreate: true})
meta, err := loadBucketMetadata(ctx, sys.objAPI, buckets[index].Name)
if err != nil {
return err

View file

@ -182,26 +182,34 @@ func loadBucketMetadataParse(ctx context.Context, objectAPI ObjectLayer, bucket
b.defaultTimestamps()
}
configs, err := b.getAllLegacyConfigs(ctx, objectAPI)
if err != nil {
return b, err
// If bucket metadata is missing look for legacy files,
// since we only ever had b.Created as non-zero when
// migration was complete in 2020-May release. So this
// a check to avoid migrating for buckets that already
// have this field set.
if b.Created.IsZero() {
configs, err := b.getAllLegacyConfigs(ctx, objectAPI)
if err != nil {
return b, err
}
if len(configs) > 0 {
// Old bucket without bucket metadata. Hence we migrate existing settings.
if err = b.convertLegacyConfigs(ctx, objectAPI, configs); err != nil {
return b, err
}
}
}
if len(configs) == 0 {
if parse {
// nothing to update, parse and proceed.
err = b.parseAllConfigs(ctx, objectAPI)
if parse {
// nothing to update, parse and proceed.
if err = b.parseAllConfigs(ctx, objectAPI); err != nil {
return b, err
}
} else {
// Old bucket without bucket metadata. Hence we migrate existing settings.
err = b.convertLegacyConfigs(ctx, objectAPI, configs)
}
if err != nil {
return b, err
}
// migrate unencrypted remote targets
if err := b.migrateTargetConfig(ctx, objectAPI); err != nil {
if err = b.migrateTargetConfig(ctx, objectAPI); err != nil {
return b, err
}
@ -331,7 +339,7 @@ func (b *BucketMetadata) getAllLegacyConfigs(ctx context.Context, objectAPI Obje
for _, legacyFile := range legacyConfigs {
configFile := path.Join(bucketMetaPrefix, b.Name, legacyFile)
configData, err := readConfig(ctx, objectAPI, configFile)
configData, info, err := readConfigWithMetadata(ctx, objectAPI, configFile, ObjectOptions{})
if err != nil {
if _, ok := err.(ObjectExistsAsDirectory); ok {
// in FS mode it possible that we have actual
@ -346,6 +354,7 @@ func (b *BucketMetadata) getAllLegacyConfigs(ctx context.Context, objectAPI Obje
return nil, err
}
configs[legacyFile] = configData
b.Created = info.ModTime
}
return configs, nil

View file

@ -1958,11 +1958,7 @@ func (z *erasureServerPools) HealFormat(ctx context.Context, dryRun bool) (madmi
}
func (z *erasureServerPools) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) {
// Attempt heal on the bucket metadata, ignore any failures
hopts := opts
hopts.Recreate = false
defer z.HealObject(ctx, minioMetaBucket, pathJoin(bucketMetaPrefix, bucket, bucketMetadataFile), "", hopts)
// .metadata.bin healing is not needed here, it is automatically healed via read() call.
return z.s3Peer.HealBucket(ctx, bucket, opts)
}

View file

@ -313,7 +313,7 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti
infop, err := storageDiskInfoHandler.Call(ctx, client.gridConn, &opts)
if err != nil {
return info, err
return info, toStorageErr(err)
}
info = *infop
if info.Error != "" {
@ -442,10 +442,6 @@ func (client *storageRESTClient) CheckParts(ctx context.Context, volume string,
// RenameData - rename source path to destination path atomically, metadata and data file.
func (client *storageRESTClient) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (sign uint64, err error) {
// Set a very long timeout for rename data.
ctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()
resp, err := storageRenameDataHandler.Call(ctx, client.gridConn, &RenameDataHandlerParams{
DiskID: client.diskID,
SrcVolume: srcVolume,
@ -704,10 +700,6 @@ func (client *storageRESTClient) DeleteVersions(ctx context.Context, volume stri
// RenameFile - renames a file.
func (client *storageRESTClient) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) (err error) {
// Set a very long timeout for rename file
ctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()
_, err = storageRenameFileHandler.Call(ctx, client.gridConn, &RenameFileHandlerParams{
DiskID: client.diskID,
SrcVolume: srcVolume,

View file

@ -175,8 +175,8 @@ func (c ContextDialer) DialContext(ctx context.Context, network, address string)
const (
defaultOutQueue = 10000
readBufferSize = 16 << 10
writeBufferSize = 16 << 10
readBufferSize = 32 << 10 // 32 KiB is the most optimal on Linux
writeBufferSize = 32 << 10 // 32 KiB is the most optimal on Linux
defaultDialTimeout = 2 * time.Second
connPingInterval = 10 * time.Second
connWriteTimeout = 3 * time.Second
@ -654,7 +654,7 @@ func (c *Connection) connect() {
fmt.Printf("%v Connecting to %v: %v. Retrying.\n", c.Local, toDial, err)
}
sleep := defaultDialTimeout + time.Duration(rng.Int63n(int64(defaultDialTimeout)))
next := dialStarted.Add(sleep)
next := dialStarted.Add(sleep / 2)
sleep = time.Until(next).Round(time.Millisecond)
if sleep < 0 {
sleep = 0
@ -950,7 +950,7 @@ func (c *Connection) handleMessages(ctx context.Context, conn net.Conn) {
cancel(ErrDisconnected)
return
}
if cap(msg) > readBufferSize*8 {
if cap(msg) > readBufferSize*4 {
// Don't keep too much memory around.
msg = nil
}

View file

@ -45,7 +45,7 @@ const (
maxBufferSize = 64 << 10
// If there is a queue, merge up to this many messages.
maxMergeMessages = 20
maxMergeMessages = 30
// clientPingInterval will ping the remote handler every 15 seconds.
// Clients disconnect when we exceed 2 intervals.