fix: calling statfs() call moves the disk head (#18203)

if erasure upgrade is needed rely on the in-memory
values, instead of performing a "DiskInfo()" call.

https://brendangregg.com/blog/2016-09-03/sudden-disk-busy.html

for HDDs these are problematic, lets avoid this because
there is no value in "being" absolutely strict here
in terms of parity. We are okay to increase parity
as we see based on the in-memory online/offline ratio.
This commit is contained in:
Harshavardhana 2023-10-10 13:47:35 -07:00 committed by GitHub
parent 9ab1f25a47
commit 77e94087cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 49 deletions

View file

@ -40,7 +40,6 @@ import (
"github.com/minio/minio/internal/logger"
"github.com/minio/pkg/v2/mimedb"
"github.com/minio/pkg/v2/sync/errgroup"
uatomic "go.uber.org/atomic"
)
func (er erasureObjects) getUploadIDDir(bucket, object, uploadID string) string {
@ -273,9 +272,15 @@ func (er erasureObjects) ListMultipartUploads(ctx context.Context, bucket, objec
if len(disks) == 0 {
// using er.getLoadBalancedLocalDisks() has one side-affect where
// on a pooled setup all disks are remote, add a fallback
disks = er.getOnlineDisks()
disks = er.getDisks()
}
for _, disk = range disks {
if disk == nil {
continue
}
if !disk.IsOnline() {
continue
}
uploadIDs, err = disk.ListDir(ctx, minioMetaMultipartBucket, er.getMultipartSHADir(bucket, object), -1)
if err != nil {
if errors.Is(err, errDiskNotFound) {
@ -399,47 +404,31 @@ func (er erasureObjects) newMultipartUpload(ctx context.Context, bucket string,
// If we have offline disks upgrade the number of erasure codes for this object.
parityOrig := parityDrives
atomicParityDrives := uatomic.NewInt64(0)
atomicOfflineDrives := uatomic.NewInt64(0)
// Start with current parityDrives
atomicParityDrives.Store(int64(parityDrives))
var wg sync.WaitGroup
var offlineDrives int
for _, disk := range onlineDisks {
if disk == nil {
atomicParityDrives.Inc()
atomicOfflineDrives.Inc()
parityDrives++
offlineDrives++
continue
}
if !disk.IsOnline() {
atomicParityDrives.Inc()
atomicOfflineDrives.Inc()
parityDrives++
offlineDrives++
continue
}
wg.Add(1)
go func(disk StorageAPI) {
defer wg.Done()
di, err := disk.DiskInfo(ctx, false)
if err != nil || di.ID == "" {
atomicOfflineDrives.Inc()
atomicParityDrives.Inc()
}
}(disk)
}
wg.Wait()
if int(atomicOfflineDrives.Load()) >= (len(onlineDisks)+1)/2 {
if offlineDrives >= (len(onlineDisks)+1)/2 {
// if offline drives are more than 50% of the drives
// we have no quorum, we shouldn't proceed just
// fail at that point.
return nil, toObjectErr(errErasureWriteQuorum, bucket, object)
}
parityDrives = int(atomicParityDrives.Load())
if parityDrives >= len(onlineDisks)/2 {
parityDrives = len(onlineDisks) / 2
}
if parityOrig != parityDrives {
userDefined[minIOErasureUpgraded] = strconv.Itoa(parityOrig) + "->" + strconv.Itoa(parityDrives)
}

View file

@ -47,7 +47,6 @@ import (
"github.com/minio/pkg/v2/sync/errgroup"
"github.com/minio/pkg/v2/wildcard"
"github.com/tinylib/msgp/msgp"
uatomic "go.uber.org/atomic"
)
// list all errors which can be ignored in object operations.
@ -1097,47 +1096,31 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
// If we have offline disks upgrade the number of erasure codes for this object.
parityOrig := parityDrives
atomicParityDrives := uatomic.NewInt64(0)
atomicOfflineDrives := uatomic.NewInt64(0)
// Start with current parityDrives
atomicParityDrives.Store(int64(parityDrives))
var wg sync.WaitGroup
var offlineDrives int
for _, disk := range storageDisks {
if disk == nil {
atomicParityDrives.Inc()
atomicOfflineDrives.Inc()
parityDrives++
offlineDrives++
continue
}
if !disk.IsOnline() {
atomicParityDrives.Inc()
atomicOfflineDrives.Inc()
parityDrives++
offlineDrives++
continue
}
wg.Add(1)
go func(disk StorageAPI) {
defer wg.Done()
di, err := disk.DiskInfo(ctx, false)
if err != nil || di.ID == "" {
atomicOfflineDrives.Inc()
atomicParityDrives.Inc()
}
}(disk)
}
wg.Wait()
if int(atomicOfflineDrives.Load()) >= (len(storageDisks)+1)/2 {
if offlineDrives >= (len(storageDisks)+1)/2 {
// if offline drives are more than 50% of the drives
// we have no quorum, we shouldn't proceed just
// fail at that point.
return ObjectInfo{}, toObjectErr(errErasureWriteQuorum, bucket, object)
}
parityDrives = int(atomicParityDrives.Load())
if parityDrives >= len(storageDisks)/2 {
parityDrives = len(storageDisks) / 2
}
if parityOrig != parityDrives {
userDefined[minIOErasureUpgraded] = strconv.Itoa(parityOrig) + "->" + strconv.Itoa(parityDrives)
}