fix: revert allow offline disks on fresh start (#19052)

the PR in #16541 was incorrect and hand wrong assumptions
about the overall setup, revert this since this expectation
to have offline servers is wrong and we can end up with a
bigger chicken and egg problem.

This reverts commit 5996c8c4d5.

Bonus:

- preserve disk in globalLocalDrives properly upon connectDisks()
- do not return 'nil' from newXLStorage(), getting it ready for
  the next set of changes for 'format.json' loading.
This commit is contained in:
Harshavardhana 2024-02-14 10:37:34 -08:00 committed by GitHub
parent 134db72bb7
commit f961ec4aaf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 54 additions and 42 deletions

View file

@ -264,13 +264,22 @@ func (s *erasureSets) connectDisks() {
disk.SetDiskLoc(s.poolIndex, setIndex, diskIndex)
disk.SetFormatData(formatData)
s.erasureDisks[setIndex][diskIndex] = disk
s.erasureDisksMu.Unlock()
if disk.IsLocal() && globalIsDistErasure {
if disk.IsLocal() {
globalLocalDrivesMu.Lock()
globalLocalSetDrives[s.poolIndex][setIndex][diskIndex] = disk
if globalIsDistErasure {
globalLocalSetDrives[s.poolIndex][setIndex][diskIndex] = disk
}
for i, ldisk := range globalLocalDrives {
_, k, l := ldisk.GetDiskLoc()
if k == setIndex && l == diskIndex {
globalLocalDrives[i] = disk
break
}
}
globalLocalDrivesMu.Unlock()
}
s.erasureDisksMu.Unlock()
}(endpoint)
}

View file

@ -306,7 +306,12 @@ func countErrs(errs []error, err error) int {
return i
}
// Check if unformatted disks are equal to write quorum.
// Does all errors indicate we need to initialize all disks?.
func shouldInitErasureDisks(errs []error) bool {
return countErrs(errs, errUnformattedDisk) == len(errs)
}
// Check if unformatted disks are equal to 50%+1 of all the drives.
func quorumUnformattedDisks(errs []error) bool {
return countErrs(errs, errUnformattedDisk) >= (len(errs)/2)+1
}
@ -773,9 +778,6 @@ func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount,
hostCount := make(map[string]int, setDriveCount)
for j := 0; j < setDriveCount; j++ {
disk := storageDisks[i*setDriveCount+j]
if disk == nil {
continue
}
newFormat := format.Clone()
newFormat.Erasure.This = format.Erasure.Sets[i][j]
if deploymentID != "" {
@ -800,14 +802,8 @@ func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount,
logger.Info(" - Drive: %s", disk.String())
}
})
var warning string
if wantAtMost == 0 {
warning = fmt.Sprintf("Host %v has all drives of set. ", host)
} else {
warning = fmt.Sprintf("Host %v has more than %v drives of set. ", host, wantAtMost)
}
logger.Info(color.Yellow("WARNING: ") + warning +
"A host failure will result in data becoming unavailable.")
logger.Info(color.Yellow("WARNING:")+" Host %v has more than %v drives of set. "+
"A host failure will result in data becoming unavailable.", host, wantAtMost)
}
}
}

View file

@ -208,15 +208,8 @@ func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpo
return nil, nil, err
}
// Return error when quorum unformatted disks - indicating we are
// waiting for first server to be online.
unformattedDisks := quorumUnformattedDisks(sErrs)
if unformattedDisks && !firstDisk {
return nil, nil, errNotFirstDisk
}
// All disks report unformatted we should initialized everyone.
if unformattedDisks && firstDisk {
if shouldInitErasureDisks(sErrs) && firstDisk {
logger.Info("Formatting %s pool, %v set(s), %v drives per set.",
humanize.Ordinal(poolCount), setCount, setDriveCount)
@ -232,6 +225,19 @@ func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpo
return storageDisks, format, nil
}
// Return error when quorum unformatted disks - indicating we are
// waiting for first server to be online.
unformattedDisks := quorumUnformattedDisks(sErrs)
if unformattedDisks && !firstDisk {
return nil, nil, errNotFirstDisk
}
// Return error when quorum unformatted disks but waiting for rest
// of the servers to be online.
if unformattedDisks && firstDisk {
return nil, nil, errFirstDiskWait
}
format, err = getFormatErasureInQuorum(formatConfigs)
if err != nil {
logger.LogIf(GlobalContext, err)

View file

@ -229,14 +229,24 @@ func makeFormatErasureMetaVolumes(disk StorageAPI) error {
// Initialize a new storage disk.
func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) {
path := ep.Path
if path, err = getValidPath(path); err != nil {
return nil, err
s = &xlStorage{
drivePath: ep.Path,
endpoint: ep,
globalSync: globalFSOSync,
poolIndex: -1,
setIndex: -1,
diskIndex: -1,
}
info, err := disk.GetInfo(path, true)
s.drivePath, err = getValidPath(ep.Path)
if err != nil {
return nil, err
s.drivePath = ep.Path
return s, err
}
info, err := disk.GetInfo(s.drivePath, true)
if err != nil {
return s, err
}
if !globalIsCICD && !globalIsErasureSD {
@ -247,7 +257,7 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) {
// size less than or equal to the threshold as rootDrives.
rootDrive = info.Total <= globalRootDiskThreshold
} else {
rootDrive, err = disk.IsRootDisk(path, SlashSeparator)
rootDrive, err = disk.IsRootDisk(s.drivePath, SlashSeparator)
if err != nil {
return nil, err
}
@ -257,15 +267,6 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) {
}
}
s = &xlStorage{
drivePath: path,
endpoint: ep,
globalSync: globalFSOSync,
poolIndex: -1,
setIndex: -1,
diskIndex: -1,
}
// Sanitize before setting it
if info.NRRequests > 0 {
s.nrRequests = info.NRRequests
@ -285,11 +286,11 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) {
formatData, formatFi, err := formatErasureMigrate(s.drivePath)
if err != nil && !errors.Is(err, os.ErrNotExist) {
if os.IsPermission(err) {
return nil, errDiskAccessDenied
return s, errDiskAccessDenied
} else if isSysErrIO(err) {
return nil, errFaultyDisk
return s, errFaultyDisk
}
return nil, err
return s, err
}
s.formatData = formatData
s.formatFileInfo = formatFi
@ -297,7 +298,7 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) {
// Create all necessary bucket folders if possible.
if err = makeFormatErasureMetaVolumes(s); err != nil {
return nil, err
return s, err
}
if len(s.formatData) > 0 {