2024-04-25 18:43:50 +00:00

382 lines
16 KiB

* Teleport
* Copyright (C) 2023 Gravitational, Inc.
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Affero General Public License for more details.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <>.
package teleport
const (
// MetricGenerateRequests counts how many generate server keys requests
// are issued over time
MetricGenerateRequests = "auth_generate_requests_total"
// MetricGenerateRequestsThrottled measures how many generate requests
// are throttled
MetricGenerateRequestsThrottled = "auth_generate_requests_throttled_total"
// MetricGenerateRequestsCurrent measures current in-flight requests
MetricGenerateRequestsCurrent = "auth_generate_requests"
// MetricGenerateRequestsHistogram measures generate requests latency
MetricGenerateRequestsHistogram = "auth_generate_seconds"
// MetricServerInteractiveSessions measures interactive sessions in flight
MetricServerInteractiveSessions = "server_interactive_sessions_total"
// MetricProxySSHSessions measures sessions in flight on the proxy
MetricProxySSHSessions = "proxy_ssh_sessions_total"
// MetricRemoteClusters measures connected remote clusters
MetricRemoteClusters = "remote_clusters"
// MetricTrustedClusters counts trusted clusters
MetricTrustedClusters = "trusted_clusters"
// MetricClusterNameNotFound counts times a cluster name was not found
MetricClusterNameNotFound = "cluster_name_not_found_total"
// MetricFailedLoginAttempts counts failed login attempts
MetricFailedLoginAttempts = "failed_login_attempts_total"
// MetricConnectToNodeAttempts counts ssh attempts
MetricConnectToNodeAttempts = "connect_to_node_attempts_total"
// MetricFailedConnectToNodeAttempts counts failed ssh attempts
MetricFailedConnectToNodeAttempts = "failed_connect_to_node_attempts_total"
// MetricUserMaxConcurrentSessionsHit counts number of times a user exceeded their max concurrent ssh connections
MetricUserMaxConcurrentSessionsHit = "user_max_concurrent_sessions_hit_total"
// MetricProxyConnectionLimitHit counts the number of times the proxy connection limit was exceeded
MetricProxyConnectionLimitHit = "proxy_connection_limit_exceeded_total"
// MetricUserLoginCount counts user logins
MetricUserLoginCount = "user_login_total"
// MetricHeartbeatConnectionsReceived counts heartbeat connections received by auth
MetricHeartbeatConnectionsReceived = "heartbeat_connections_received_total"
// MetricCertificateMismatch counts login failures due to certificate mismatch
MetricCertificateMismatch = "certificate_mismatch_total"
// MetricHeartbeatsMissed counts the nodes that failed to heartbeat
MetricHeartbeatsMissed = "heartbeats_missed_total"
// MetricWatcherEventsEmitted counts watcher events that are emitted
MetricWatcherEventsEmitted = "watcher_events"
// MetricWatcherEventSizes measures the size of watcher events that are emitted
MetricWatcherEventSizes = "watcher_event_sizes"
// MetricMissingSSHTunnels returns the number of missing SSH tunnels for this proxy.
MetricMissingSSHTunnels = "proxy_missing_ssh_tunnels"
// MetricMigrations tracks for each migration if it is active or not.
MetricMigrations = "migrations"
// TagMigration is a metric tag for a migration
TagMigration = "migration"
// MetricIncompleteSessionUploads returns the number of incomplete session uploads
MetricIncompleteSessionUploads = "incomplete_session_uploads_total"
// TagCluster is a metric tag for a cluster
TagCluster = "cluster"
// MetricTotalInstances provides an instance count
MetricTotalInstances = "total_instances"
// MetricEnrolledInUpgrades provides total number of instances that advertise an upgrader.
MetricEnrolledInUpgrades = "enrolled_in_upgrades"
// MetricUpgraderCounts provides instance count per-upgrader.
MetricUpgraderCounts = "upgrader_counts"
// TagUpgrader is a metric tag for upgraders.
TagUpgrader = "upgrader"
// MetricsAccessRequestsCreated provides total number of created access requests.
MetricAccessRequestsCreated = "access_requests_created"
// TagRoles is a number of roles requested as a part of access request.
TagRoles = "roles"
// TagResources is a number of resources requested as a part of access request.
TagResources = "resources"
// UserCertificatesCreated provides total number of user certificates generated.
MetricUserCertificatesGenerated = "user_certificates_generated"
// TagPrivateKeyPolicy is a private key policy associated with a user's certificates.
TagPrivateKeyPolicy = "private_key_policy"
const (
// MetricProcessCPUSecondsTotal measures CPU seconds consumed by process
MetricProcessCPUSecondsTotal = "process_cpu_seconds_total"
// MetricProcessMaxFDs shows maximum amount of file descriptors allowed for the process
MetricProcessMaxFDs = "process_max_fds"
// MetricProcessOpenFDs shows process open file descriptors
MetricProcessOpenFDs = "process_open_fds"
// MetricProcessResidentMemoryBytes measures bytes consumed by process resident memory
MetricProcessResidentMemoryBytes = "process_resident_memory_bytes"
// MetricProcessStartTimeSeconds measures process start time
MetricProcessStartTimeSeconds = "process_start_time_seconds"
const (
// MetricGoThreads is amount of system threads used by Go runtime
MetricGoThreads = "go_threads"
// MetricGoGoroutines measures current number of goroutines
MetricGoGoroutines = "go_goroutines"
// MetricGoInfo provides information about Go runtime version
MetricGoInfo = "go_info"
// MetricGoAllocBytes measures allocated memory bytes
MetricGoAllocBytes = "go_memstats_alloc_bytes"
// MetricGoHeapAllocBytes measures heap bytes allocated by Go runtime
MetricGoHeapAllocBytes = "go_memstats_heap_alloc_bytes"
// MetricGoHeapObjects measures count of heap objects created by Go runtime
MetricGoHeapObjects = "go_memstats_heap_objects"
const (
// MetricBackendWatchers is a metric with backend watchers
MetricBackendWatchers = "backend_watchers_total"
// MetricBackendWatcherQueues is a metric with backend watcher queues sizes
MetricBackendWatcherQueues = "backend_watcher_queues_total"
// MetricBackendRequests measures count of backend requests
MetricBackendRequests = "backend_requests"
// MetricBackendReadHistogram measures histogram of backend read latencies
MetricBackendReadHistogram = "backend_read_seconds"
// MetricBackendWriteHistogram measures histogram of backend write latencies
MetricBackendWriteHistogram = "backend_write_seconds"
// MetricBackendBatchWriteHistogram measures histogram of backend batch write latencies
MetricBackendBatchWriteHistogram = "backend_batch_write_seconds"
// MetricBackendBatchReadHistogram measures histogram of backend batch read latencies
MetricBackendBatchReadHistogram = "backend_batch_read_seconds"
// MetricBackendWriteRequests measures backend write requests count
MetricBackendWriteRequests = "backend_write_requests_total"
// MetricBackendWrites tallies all individual backend writes (this is distinct from backend write
// requests in that bulk writes count as multiple writes).
MetricBackendWrites = "backend_writes_total"
// MetricBackendWriteFailedRequests measures failed backend write requests count
MetricBackendWriteFailedRequests = "backend_write_requests_failed_total"
// MetricBackendWriteFailedPreconditionRequests measures the portion of failed backend write requests
// that failed due to a custom precondition (existence, revision, value, etc).
MetricBackendWriteFailedPreconditionRequests = "backend_write_requests_failed_precondition_total"
// MetricBackendAtomicWriteRequests measures backend atomic write requests count
MetricBackendAtomicWriteRequests = "backend_atomic_write_requests_total"
// MetricBackendAtomicWriteFailedRequests measures failed backend atomic write requests count
MetricBackendAtomicWriteFailedRequests = "backend_atomic_write_requests_failed_total"
// MetricBackendAtomicWriteConditionFailed measures the amount of atomic write requests that result in condition failure.
MetricBackendAtomicWriteConditionFailed = "backend_atomic_write_condition_failed_total"
// MetricBackendAtomicWriteHistogram measures histogram of backend write latencies
MetricBackendAtomicWriteHistogram = "backend_atomic_write_seconds"
// MetricBackendAtomicWriteSize measures the histogram of atomic write batch sizes
MetricBackendAtomicWriteSize = "backend_atomic_write_size"
// MetricBackendAtomicWriteContention counts the amount of times atomic writes experience internal retries due to contention.
MetricBackendAtomicWriteContention = "backend_atomic_write_contention"
// MetricBackendBatchWriteRequests measures batch backend writes count
MetricBackendBatchWriteRequests = "backend_batch_write_requests_total"
// MetricBackendBatchFailedWriteRequests measures failed batch backend requests count
MetricBackendBatchFailedWriteRequests = "backend_batch_write_requests_failed_total"
// MetricBackendReadRequests measures backend read requests count
MetricBackendReadRequests = "backend_read_requests_total"
// MetricBackendReads tallies all individual backend reads (this is distinct from backend read
// requests in that bulk reads count as multiple reads).
MetricBackendReads = "backend_reads_total"
// MetricBackendFailedReadRequests measures failed backend read requests count
MetricBackendFailedReadRequests = "backend_read_requests_failed_total"
// MetricBackendBatchReadRequests measures batch backend read requests count
MetricBackendBatchReadRequests = "backend_batch_read_requests_total"
// MetricBackendBatchFailedReadRequests measures failed backend batch read requests count
MetricBackendBatchFailedReadRequests = "backend_batch_read_requests_failed_total"
// MetricLostCommandEvents measures the number of command events that were lost
MetricLostCommandEvents = "bpf_lost_command_events"
// MetricLostDiskEvents measures the number of disk events that were lost.
MetricLostDiskEvents = "bpf_lost_disk_events"
// MetricLostNetworkEvents measures the number of network events that were lost.
MetricLostNetworkEvents = "bpf_lost_network_events"
// MetricLostRestrictedEvents measures the number of restricted events that were lost
MetricLostRestrictedEvents = "bpf_lost_restricted_events"
// MetricState tracks the state of the teleport process.
MetricState = "process_state"
// MetricNamespace defines the teleport prometheus namespace
MetricNamespace = "teleport"
// MetricConnectedResources tracks the number and type of resources connected via keepalives
MetricConnectedResources = "connected_resources"
// MetricBuildInfo tracks build information
MetricBuildInfo = "build_info"
// MetricCacheEventsReceived tracks the total number of events received by a cache
MetricCacheEventsReceived = "cache_events"
// MetricStaleCacheEventsReceived tracks the number of stale events received by a cache
MetricStaleCacheEventsReceived = "cache_stale_events"
// MetricRegisteredServers tracks the number of Teleport servers that have successfully registered with the Teleport cluster and have not reached the end of their ttl
MetricRegisteredServers = "registered_servers"
// MetricRegisteredServersByInstallMethods tracks the number of Teleport servers, and their installation method,
// that have successfully registered with the Teleport cluster and have not reached the end of their ttl
MetricRegisteredServersByInstallMethods = "registered_servers_by_install_methods"
// MetricReverseSSHTunnels defines the number of connected SSH reverse tunnels to the proxy
MetricReverseSSHTunnels = "reverse_tunnels_connected"
// MetricHostedPluginStatus tracks the current status
// (as defined by types.PluginStatus) for a plugin instance
MetricHostedPluginStatus = "hosted_plugin_status"
// MetricTeleportServices tracks which services are currently running in the current Teleport Process.
MetricTeleportServices = "services"
// TagRange is a tag specifying backend requests
TagRange = "range"
// TagReq is a tag specifying backend request type
TagReq = "req"
// TagTrue is a tag value to mark true values
TagTrue = "true"
// TagFalse is a tag value to mark false values
TagFalse = "false"
// TagResource is a tag specifying the resource for an event
TagResource = "resource"
// TagVersion is a prometheus label for version of Teleport built
TagVersion = "version"
// TagGitref is a prometheus label for the gitref of Teleport built
TagGitref = "gitref"
// TagGoVersion is a prometheus label for version of Go used to build Teleport
TagGoVersion = "goversion"
// TagCacheComponent is a prometheus label for the cache component
TagCacheComponent = "cache_component"
// TagType is a prometheus label for type of resource or tunnel connected
TagType = "type"
// TagServer is a prometheus label to indicate what server the metric is tied to
TagServer = "server"
// TagClient is a prometheus label to indicate what client the metric is tied to
TagClient = "client"
// TagInstallMethods is a prometheus label to indicate what installation methods
// were used for the agent.
// This value comes from UpstreamInventoryAgentMetadata (sourced in lib/inventory/metadata.fetchInstallMethods).
TagInstallMethods = "install_methods"
// TagServiceName is the prometheus label to indicate what services are running in the current proxy.
// Those services are monitored using the Supervisor.
// Only a subset of services are monitored. See [lib/service.metricsServicesRunningMap]
// Eg, discovery_service
TagServiceName = "service_name"
// TagAutomaticUpdates is a prometheus label to indicate whether the instance
// is enrolled in automatic updates.
TagAutomaticUpdates = "automatic_updates"
const (
// MetricUsageEventsSubmitted is a count of usage events that have been generated.
MetricUsageEventsSubmitted = "usage_events_submitted_total"
// MetricUsageBatches is a count of batches enqueued for submission.
MetricUsageBatches = "usage_batches_total"
// MetricUsageEventsRequeued is a count of events that were requeued after a
// submission failed.
MetricUsageEventsRequeued = "usage_events_requeued_total"
// MetricUsageBatchSubmissionDuration is a histogram of durations it took to
// submit a batch.
MetricUsageBatchSubmissionDuration = "usage_batch_submission_duration_seconds"
// MetricUsageBatchesSubmitted is a count of event batches successfully
// submitted.
MetricUsageBatchesSubmitted = "usage_batch_submitted_total"
// MetricUsageBatchesFailed is a count of event batches that failed to
// submit.
MetricUsageBatchesFailed = "usage_batch_failed_total"
// MetricUsageEventsDropped is a count of events dropped due to the
// submission buffer reaching a length limit.
MetricUsageEventsDropped = "usage_events_dropped_total"
// athena audit log metrics
const (
// MetricParquetlogConsumerBatchPorcessingDuration is a histogram of durations it
// took to process single batch of events.
MetricParquetlogConsumerBatchPorcessingDuration = "audit_parquetlog_batch_processing_seconds"
// MetricParquetlogConsumerS3FlushDuration is a histogram of durations it took to
// flush and close parquet files on s3.
MetricParquetlogConsumerS3FlushDuration = "audit_parquetlog_s3_flush_seconds"
// MetricParquetlogConsumerDeleteEventsDuration is a histogram of durations it
// took to delete events from SQS.
MetricParquetlogConsumerDeleteEventsDuration = "audit_parquetlog_delete_events_seconds"
// MetricParquetlogConsumerBatchSize is a histogram of sizes of single batch of events.
MetricParquetlogConsumerBatchSize = "audit_parquetlog_batch_size"
// MetricParquetlogConsumerBatchCount is a count of number of events in single batch.
MetricParquetlogConsumerBatchCount = "audit_parquetlog_batch_count"
// MetricParquetlogConsumerLastProcessedTimestamp is a timestamp of last finished consumer execution.
MetricParquetlogConsumerLastProcessedTimestamp = "audit_parquetlog_last_processed_timestamp"
// MetricParquetlogConsumerOldestProcessedMessage is age of oldest processed message.
MetricParquetlogConsumerOldestProcessedMessage = "audit_parquetlog_age_oldest_processed_message"
// MetricAthenaConsumerCollectFailed is a count of number of errors received from sqs collect.
MetricParquetlogConsumerCollectFailed = "audit_parquetlog_errors_from_collect_count"