Add cluster scanner metrics in metrics-v3 (#19517)

endpoint: /minio/metrics/v3/cluster/scanner
metrics:
 - bucket_scans_finished (counter)
 - bucket_scans_started (counter)
 - directories_scanned (counter)
 - last_activity_nano_seconds (gauge)
 - objects_scanned (counter)
 - versions_scanned (counter)
This commit is contained in:
Shireesh Anjal 2024-05-25 00:59:25 +05:30 committed by GitHub
parent 443c93c634
commit a591e06ae5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 99 additions and 0 deletions

66
cmd/metrics-v3-scanner.go Normal file
View file

@ -0,0 +1,66 @@
// Copyright (c) 2015-2024 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cmd
import (
"context"
"time"
)
const (
scannerBucketScansFinished = "bucket_scans_finished"
scannerBucketScansStarted = "bucket_scans_started"
scannerDirectoriesScanned = "directories_scanned"
scannerObjectsScanned = "objects_scanned"
scannerVersionsScanned = "versions_scanned"
scannerLastActivitySeconds = "last_activity_seconds"
)
var (
scannerBucketScansFinishedMD = NewCounterMD(scannerBucketScansFinished,
"Total number of bucket scans finished since server start")
scannerBucketScansStartedMD = NewCounterMD(scannerBucketScansStarted,
"Total number of bucket scans started since server start")
scannerDirectoriesScannedMD = NewCounterMD(scannerDirectoriesScanned,
"Total number of directories scanned since server start")
scannerObjectsScannedMD = NewCounterMD(scannerObjectsScanned,
"Total number of unique objects scanned since server start")
scannerVersionsScannedMD = NewCounterMD(scannerVersionsScanned,
"Total number of object versions scanned since server start")
scannerLastActivitySecondsMD = NewGaugeMD(scannerLastActivitySeconds,
"Time elapsed (in seconds) since last scan activity.")
)
// loadClusterScannerMetrics - `MetricsLoaderFn` for cluster webhook
// such as failed objects and directories scanned.
func loadClusterScannerMetrics(ctx context.Context, m MetricValues, c *metricsCache) error {
m.Set(scannerBucketScansFinished, float64(globalScannerMetrics.lifetime(scannerMetricScanBucketDrive)))
m.Set(scannerBucketScansStarted, float64(globalScannerMetrics.lifetime(scannerMetricScanBucketDrive)+uint64(globalScannerMetrics.activeDrives())))
m.Set(scannerDirectoriesScanned, float64(globalScannerMetrics.lifetime(scannerMetricScanFolder)))
m.Set(scannerObjectsScanned, float64(globalScannerMetrics.lifetime(scannerMetricScanObject)))
m.Set(scannerVersionsScanned, float64(globalScannerMetrics.lifetime(scannerMetricApplyVersion)))
dui, err := c.dataUsageInfo.Get()
if err != nil {
metricsLogIf(ctx, err)
} else {
m.Set(scannerLastActivitySeconds, time.Since(dui.LastUpdate).Seconds())
}
return nil
}

View file

@ -58,6 +58,7 @@ const (
loggerWebhookCollectorPath collectorPath = "/logger/webhook"
replicationCollectorPath collectorPath = "/replication"
notificationCollectorPath collectorPath = "/notification"
scannerCollectorPath collectorPath = "/scanner"
)
const (
@ -351,6 +352,18 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
loadClusterConfigMetrics,
)
scannerMG := NewMetricsGroup(scannerCollectorPath,
[]MetricDescriptor{
scannerBucketScansFinishedMD,
scannerBucketScansStartedMD,
scannerDirectoriesScannedMD,
scannerObjectsScannedMD,
scannerVersionsScannedMD,
scannerLastActivitySecondsMD,
},
loadClusterScannerMetrics,
)
loggerWebhookMG := NewMetricsGroup(loggerWebhookCollectorPath,
[]MetricDescriptor{
webhookFailedMessagesMD,
@ -389,6 +402,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
clusterReplicationMG,
clusterConfigMG,
scannerMG,
auditMG,
loggerWebhookMG,
}

View file

@ -58,6 +58,14 @@ These are metrics about the minio notification functionality
|----------|------------------------------------------------------|
| `/notification` | Metrics related to notification functionality |
### Scanner metrics
These are metrics about the minio scanner
| Path | Description |
|------------|--------------------------------------|
| `/scanner` | Metrics related to the MinIO scanner |
### System metrics
These are metrics about the minio process and the node.
@ -358,3 +366,14 @@ The standard metrics group for GoCollector is not shown below.
| `minio_notification_events_errors_total` | `counter` | Events that were failed to be sent to the targets | `server` |
| `minio_notification_events_sent_total` | `counter` | Total number of events sent to the targets | `server` |
| `minio_notification_events_skipped_total` | `counter` | Events that were skipped to be sent to the targets due to the in-memory queue being full | `server` |
### `/scanner`
| Name | Type | Help | Labels |
|--------------------------------------------|-----------|------------------------------------------------------------|----------|
| `minio_scanner_bucket_scans_finished` | `counter` | Total number of bucket scans finished since server start | `server` |
| `minio_scanner_bucket_scans_started` | `counter` | Total number of bucket scans started since server start | `server` |
| `minio_scanner_directories_scanned` | `counter` | Total number of directories scanned since server start | `server` |
| `minio_scanner_last_activity_seconds` | `gauge` | Time elapsed (in seconds) since last scan activity | `server` |
| `minio_scanner_objects_scanned` | `counter` | Total number of unique objects scanned since server start | `server` |
| `minio_scanner_versions_scanned` | `counter` | Total number of object versions scanned since server start | `server` |