allow changing endpoints in distributed setups (#16071)

This commit is contained in:
Harshavardhana 2022-11-16 07:59:10 -08:00 committed by GitHub
parent 3597af789e
commit 853c4de75a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 26 additions and 25 deletions

View file

@ -1,4 +1,4 @@
// Copyright (c) 2015-2021 MinIO, Inc.
// Copyright (c) 2015-2022 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
@ -64,6 +64,7 @@ func (s1 ServerSystemConfig) Diff(s2 ServerSystemConfig) error {
return fmt.Errorf("Expected platform '%s', found to be running '%s'",
s1.MinioPlatform, s2.MinioPlatform)
}
if s1.MinioEndpoints.NEndpoints() != s2.MinioEndpoints.NEndpoints() {
return fmt.Errorf("Expected number of endpoints %d, seen %d", s1.MinioEndpoints.NEndpoints(),
s2.MinioEndpoints.NEndpoints())
@ -200,15 +201,18 @@ func verifyServerSystemConfig(ctx context.Context, endpointServerPools EndpointS
srcCfg := getServerSystemCfg()
clnts := newBootstrapRESTClients(endpointServerPools)
var onlineServers int
var offlineEndpoints []string
var offlineEndpoints []error
var incorrectConfigs []error
var retries int
for onlineServers < len(clnts)/2 {
for _, clnt := range clnts {
if err := clnt.Verify(ctx, srcCfg); err != nil {
if !isNetworkError(err) {
logger.LogIf(ctx, fmt.Errorf("%s has incorrect configuration: %w", clnt.String(), err))
logger.LogOnceIf(ctx, fmt.Errorf("%s has incorrect configuration: %w", clnt.String(), err), clnt.String())
incorrectConfigs = append(incorrectConfigs, fmt.Errorf("%s has incorrect configuration: %w", clnt.String(), err))
} else {
offlineEndpoints = append(offlineEndpoints, fmt.Errorf("%s is unreachable: %w", clnt.String(), err))
}
offlineEndpoints = append(offlineEndpoints, clnt.String())
continue
}
onlineServers++
@ -221,15 +225,19 @@ func verifyServerSystemConfig(ctx context.Context, endpointServerPools EndpointS
// 100% CPU when half the endpoints are offline.
time.Sleep(100 * time.Millisecond)
retries++
// after 5 retries start logging that servers are not reachable yet
if retries >= 5 {
logger.Info(fmt.Sprintf("Waiting for atleast %d remote servers to be online for bootstrap check", len(clnts)/2))
// after 20 retries start logging that servers are not reachable yet
if retries >= 20 {
logger.Info(fmt.Sprintf("Waiting for atleast %d remote servers with valid configuration to be online", len(clnts)/2))
if len(offlineEndpoints) > 0 {
logger.Info(fmt.Sprintf("Following servers are currently offline or unreachable %s", offlineEndpoints))
}
if len(incorrectConfigs) > 0 {
logger.Info(fmt.Sprintf("Following servers mismatch in their configuration %s", incorrectConfigs))
}
retries = 0 // reset to log again after 5 retries.
}
offlineEndpoints = nil
incorrectConfigs = nil
}
}
return nil

View file

@ -317,7 +317,7 @@ func (p *poolMeta) validate(pools []*erasureSets) (bool, error) {
}
replaceScheme := func(k string) string {
// This is needed as fallback when users are changeing
// This is needed as fallback when users are updating
// from http->https or https->http, we need to verify
// both because MinIO remembers the command-line in
// "exact" order - as long as this order is not disturbed
@ -359,11 +359,7 @@ func (p *poolMeta) validate(pools []*erasureSets) (bool, error) {
}
}
if !ok {
if globalIsErasureSD {
update = true
} else {
return false, fmt.Errorf("pool(%s) = %s is not specified, please specify on server command line", humanize.Ordinal(pi.position+1), k)
}
update = true
}
}
@ -378,11 +374,7 @@ func (p *poolMeta) validate(pools []*erasureSets) (bool, error) {
}
}
if !ok {
if globalIsErasureSD {
update = true
} else {
return false, fmt.Errorf("pool(%s) = %s is not specified, please specify on server command line", humanize.Ordinal(pi.position+1), k)
}
update = true
}
if ok && pos != pi.position {
return false, fmt.Errorf("pool order change detected for %s, expected position is (%s) but found (%s)", k, humanize.Ordinal(pi.position+1), humanize.Ordinal(pos+1))
@ -400,6 +392,7 @@ func (p *poolMeta) validate(pools []*erasureSets) (bool, error) {
}
}
}
return update, nil
}

View file

@ -112,16 +112,16 @@ func TestPoolMetaValidate(t *testing.T) {
{
meta: meta,
pools: newPools,
name: "Invalid-Commandline",
expectedErr: true,
expectedUpdate: false,
name: "Correct-Update",
expectedErr: false,
expectedUpdate: true,
},
{
meta: meta,
pools: reducedPools,
name: "Invalid-Reduced",
expectedErr: true,
expectedUpdate: false,
name: "Correct-Update",
expectedErr: false,
expectedUpdate: true,
},
{
meta: meta,

View file

@ -568,7 +568,7 @@ func serverMain(ctx *cli.Context) {
setHTTPServer(httpServer)
if globalIsDistErasure && globalEndpoints.FirstLocal() {
if globalIsDistErasure {
// Additionally in distributed setup, validate the setup and configuration.
if err := verifyServerSystemConfig(GlobalContext, globalEndpoints); err != nil {
logger.Fatal(err, "Unable to start the server")