diff --git a/docs/pages/setup/reference/metrics.mdx b/docs/pages/setup/reference/metrics.mdx index b23223fe47e..9692e90bb4c 100644 --- a/docs/pages/setup/reference/metrics.mdx +++ b/docs/pages/setup/reference/metrics.mdx @@ -111,6 +111,7 @@ Now you can see the monitoring information by visiting several endpoints: | `teleport_build_info` | gauge | Teleport | Provides build information of Teleport including gitref (git describe --long --tags), Go version, and Teleport version. The value of this gauge will always be 1. | | `teleport_cache_events` | counter | Teleport | Number of events received by a Teleport service cache. Teleport's Auth Service, Proxy Service, and other services cache incoming events related to their service. | | `teleport_cache_stale_events` | counter | Teleport | Number of stale events received by a Teleport service cache. A high percentage of stale events can indicate a degraded backend. | +| `teleport_connected_resources` | gauge | Teleport Auth | Tracks the number and type of resources connected via keepalives. | | `teleport_registered_servers` | gauge | Teleport Auth | The number of Teleport servers (a server consists of one or more Teleport services) that have connected to the Teleport cluster, including the Teleport version. After disconnecting, a Teleport server has a TTL of 10 minutes, so this value will include servers that have recently disconnected but have not reached their TTL. | | `teleport_reverse_tunnels_connected` | gauge | Teleport Proxy | Number of reverse SSH tunnels connected to the Teleport Proxy Service by Teleport instances. | | `trusted_clusters` | gauge | Teleport | Number of tunnels per state. | diff --git a/lib/auth/grpcserver.go b/lib/auth/grpcserver.go index 167883ba53c..6325188cf3a 100644 --- a/lib/auth/grpcserver.go +++ b/lib/auth/grpcserver.go @@ -77,6 +77,14 @@ var ( Buckets: prometheus.LinearBuckets(0, 100, 20), }, ) + connectedResources = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Name: teleport.MetricConnectedResources, + Help: "Tracks the number and type of resources connected via keepalives", + }, + []string{teleport.TagType}, + ) ) // GRPCServer is GPRC Auth Server API @@ -126,11 +134,6 @@ func (g *GRPCServer) SendKeepAlives(stream proto.AuthService_SendKeepAlivesServe if err != nil { return trace.Wrap(err) } - if firstIteration { - g.Debugf("Got heartbeat connection from %v.", auth.User.GetName()) - heartbeatConnectionsReceived.Inc() - firstIteration = false - } keepAlive, err := stream.Recv() if err == io.EOF { g.Debugf("Connection closed.") @@ -144,6 +147,13 @@ func (g *GRPCServer) SendKeepAlives(stream proto.AuthService_SendKeepAlivesServe if err != nil { return trace.Wrap(err) } + if firstIteration { + g.Debugf("Got heartbeat connection from %v.", auth.User.GetName()) + heartbeatConnectionsReceived.Inc() + connectedResources.WithLabelValues(keepAlive.GetType()).Inc() + defer connectedResources.WithLabelValues(keepAlive.GetType()).Dec() + firstIteration = false + } } } @@ -3752,7 +3762,7 @@ func (cfg *GRPCServerConfig) CheckAndSetDefaults() error { // NewGRPCServer returns a new instance of GRPC server func NewGRPCServer(cfg GRPCServerConfig) (*GRPCServer, error) { - err := utils.RegisterPrometheusCollectors(heartbeatConnectionsReceived, watcherEventsEmitted, watcherEventSizes) + err := utils.RegisterPrometheusCollectors(heartbeatConnectionsReceived, watcherEventsEmitted, watcherEventSizes, connectedResources) if err != nil { return nil, trace.Wrap(err) } diff --git a/metrics.go b/metrics.go index d9647d9fd81..49ed320c614 100644 --- a/metrics.go +++ b/metrics.go @@ -180,6 +180,9 @@ const ( // MetricNamespace defines the teleport prometheus namespace MetricNamespace = "teleport" + // MetricConnectedResources tracks the number and type of resources connected via keepalives + MetricConnectedResources = "connected_resources" + // MetricBuildInfo tracks build information MetricBuildInfo = "build_info"