From 5e7b243bde3e5bdaaec0bf886fa9321c3eb59ea4 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 26 Jun 2024 00:44:34 -0700 Subject: [PATCH] extend cluster health to return errors for IAM, and Bucket metadata (#19995) Bonus: make API freeze to be opt-in instead of default --- cmd/bootstrap-peer-server.go | 19 +++++++++++-------- cmd/common-main.go | 2 +- cmd/globals.go | 4 ++-- cmd/healthcheck-handler.go | 33 ++++++++++++++++++++++++++------- cmd/server-main.go | 7 ++++--- 5 files changed, 44 insertions(+), 21 deletions(-) diff --git a/cmd/bootstrap-peer-server.go b/cmd/bootstrap-peer-server.go index ebb60a919..1c3f60d08 100644 --- a/cmd/bootstrap-peer-server.go +++ b/cmd/bootstrap-peer-server.go @@ -106,14 +106,17 @@ func (s1 *ServerSystemConfig) Diff(s2 *ServerSystemConfig) error { } var skipEnvs = map[string]struct{}{ - "MINIO_OPTS": {}, - "MINIO_CERT_PASSWD": {}, - "MINIO_SERVER_DEBUG": {}, - "MINIO_DSYNC_TRACE": {}, - "MINIO_ROOT_USER": {}, - "MINIO_ROOT_PASSWORD": {}, - "MINIO_ACCESS_KEY": {}, - "MINIO_SECRET_KEY": {}, + "MINIO_OPTS": {}, + "MINIO_CERT_PASSWD": {}, + "MINIO_SERVER_DEBUG": {}, + "MINIO_DSYNC_TRACE": {}, + "MINIO_ROOT_USER": {}, + "MINIO_ROOT_PASSWORD": {}, + "MINIO_ACCESS_KEY": {}, + "MINIO_SECRET_KEY": {}, + "MINIO_OPERATOR_VERSION": {}, + "MINIO_VSPHERE_PLUGIN_VERSION": {}, + "MINIO_CI_CD": {}, } func getServerSystemCfg() *ServerSystemConfig { diff --git a/cmd/common-main.go b/cmd/common-main.go index b739f7ef5..cfb097eb6 100644 --- a/cmd/common-main.go +++ b/cmd/common-main.go @@ -834,7 +834,7 @@ func serverHandleEnvVars() { } } - globalDisableFreezeOnBoot = env.Get("_MINIO_DISABLE_API_FREEZE_ON_BOOT", "") == "true" || serverDebugLog + globalEnableSyncBoot = env.Get("MINIO_SYNC_BOOT", config.EnableOff) == config.EnableOn } func loadRootCredentials() { diff --git a/cmd/globals.go b/cmd/globals.go index 0405c7770..7491ac8e5 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -449,8 +449,8 @@ var ( // dynamic sleeper for multipart expiration routine deleteMultipartCleanupSleeper = newDynamicSleeper(5, 25*time.Millisecond, false) - // Is _MINIO_DISABLE_API_FREEZE_ON_BOOT set? - globalDisableFreezeOnBoot bool + // Is MINIO_SYNC_BOOT set? + globalEnableSyncBoot bool // Contains NIC interface name used for internode communication globalInternodeInterface string diff --git a/cmd/healthcheck-handler.go b/cmd/healthcheck-handler.go index 48b14e2ca..12368d1da 100644 --- a/cmd/healthcheck-handler.go +++ b/cmd/healthcheck-handler.go @@ -29,14 +29,35 @@ import ( const unavailable = "offline" -// ClusterCheckHandler returns if the server is ready for requests. -func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) { - ctx := newContext(r, w, "ClusterCheckHandler") - +func checkHealth(w http.ResponseWriter) ObjectLayer { objLayer := newObjectLayerFn() if objLayer == nil { w.Header().Set(xhttp.MinIOServerStatus, unavailable) writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) + return nil + } + + if !globalBucketMetadataSys.Initialized() { + w.Header().Set(xhttp.MinIOServerStatus, "bucket-metadata-offline") + writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) + return nil + } + + if !globalIAMSys.Initialized() { + w.Header().Set(xhttp.MinIOServerStatus, "iam-offline") + writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) + return nil + } + + return objLayer +} + +// ClusterCheckHandler returns if the server is ready for requests. +func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) { + ctx := newContext(r, w, "ClusterCheckHandler") + + objLayer := checkHealth(w) + if objLayer == nil { return } @@ -72,10 +93,8 @@ func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) { func ClusterReadCheckHandler(w http.ResponseWriter, r *http.Request) { ctx := newContext(r, w, "ClusterReadCheckHandler") - objLayer := newObjectLayerFn() + objLayer := checkHealth(w) if objLayer == nil { - w.Header().Set(xhttp.MinIOServerStatus, unavailable) - writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) return } diff --git a/cmd/server-main.go b/cmd/server-main.go index f72807e2f..52be5843f 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -897,7 +897,7 @@ func serverMain(ctx *cli.Context) { }) } - if !globalDisableFreezeOnBoot { + if globalEnableSyncBoot { // Freeze the services until the bucket notification subsystem gets initialized. bootstrapTrace("freezeServices", freezeServices) } @@ -1000,10 +1000,11 @@ func serverMain(ctx *cli.Context) { }() go func() { - if !globalDisableFreezeOnBoot { + if globalEnableSyncBoot { defer bootstrapTrace("unfreezeServices", unfreezeServices) t := time.AfterFunc(5*time.Minute, func() { - warnings = append(warnings, color.YellowBold("- Initializing the config subsystem is taking longer than 5 minutes. Please set '_MINIO_DISABLE_API_FREEZE_ON_BOOT=true' to not freeze the APIs")) + warnings = append(warnings, + color.YellowBold("- Initializing the config subsystem is taking longer than 5 minutes. Please remove 'MINIO_SYNC_BOOT=on' to not freeze the APIs")) }) defer t.Stop() }