add teleport_connected_resources metric (#9603)

This adds the Prometheus metric teleport_connected_resources. Gauge increments when the keepalive is established and will decrement whenever the connection is broken/closed.
This commit is contained in:
Carson Anderson 2022-02-16 13:19:28 -07:00 committed by GitHub
parent 55fbd56217
commit 266811f33e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 20 additions and 6 deletions

View file

@ -111,6 +111,7 @@ Now you can see the monitoring information by visiting several endpoints:
| `teleport_build_info` | gauge | Teleport | Provides build information of Teleport including gitref (git describe --long --tags), Go version, and Teleport version. The value of this gauge will always be 1. |
| `teleport_cache_events` | counter | Teleport | Number of events received by a Teleport service cache. Teleport's Auth Service, Proxy Service, and other services cache incoming events related to their service. |
| `teleport_cache_stale_events` | counter | Teleport | Number of stale events received by a Teleport service cache. A high percentage of stale events can indicate a degraded backend. |
| `teleport_connected_resources` | gauge | Teleport Auth | Tracks the number and type of resources connected via keepalives. |
| `teleport_registered_servers` | gauge | Teleport Auth | The number of Teleport servers (a server consists of one or more Teleport services) that have connected to the Teleport cluster, including the Teleport version. After disconnecting, a Teleport server has a TTL of 10 minutes, so this value will include servers that have recently disconnected but have not reached their TTL. |
| `teleport_reverse_tunnels_connected` | gauge | Teleport Proxy | Number of reverse SSH tunnels connected to the Teleport Proxy Service by Teleport instances. |
| `trusted_clusters` | gauge | Teleport | Number of tunnels per state. |

View file

@ -77,6 +77,14 @@ var (
Buckets: prometheus.LinearBuckets(0, 100, 20),
},
)
connectedResources = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: teleport.MetricNamespace,
Name: teleport.MetricConnectedResources,
Help: "Tracks the number and type of resources connected via keepalives",
},
[]string{teleport.TagType},
)
)
// GRPCServer is GPRC Auth Server API
@ -126,11 +134,6 @@ func (g *GRPCServer) SendKeepAlives(stream proto.AuthService_SendKeepAlivesServe
if err != nil {
return trace.Wrap(err)
}
if firstIteration {
g.Debugf("Got heartbeat connection from %v.", auth.User.GetName())
heartbeatConnectionsReceived.Inc()
firstIteration = false
}
keepAlive, err := stream.Recv()
if err == io.EOF {
g.Debugf("Connection closed.")
@ -144,6 +147,13 @@ func (g *GRPCServer) SendKeepAlives(stream proto.AuthService_SendKeepAlivesServe
if err != nil {
return trace.Wrap(err)
}
if firstIteration {
g.Debugf("Got heartbeat connection from %v.", auth.User.GetName())
heartbeatConnectionsReceived.Inc()
connectedResources.WithLabelValues(keepAlive.GetType()).Inc()
defer connectedResources.WithLabelValues(keepAlive.GetType()).Dec()
firstIteration = false
}
}
}
@ -3752,7 +3762,7 @@ func (cfg *GRPCServerConfig) CheckAndSetDefaults() error {
// NewGRPCServer returns a new instance of GRPC server
func NewGRPCServer(cfg GRPCServerConfig) (*GRPCServer, error) {
err := utils.RegisterPrometheusCollectors(heartbeatConnectionsReceived, watcherEventsEmitted, watcherEventSizes)
err := utils.RegisterPrometheusCollectors(heartbeatConnectionsReceived, watcherEventsEmitted, watcherEventSizes, connectedResources)
if err != nil {
return nil, trace.Wrap(err)
}

View file

@ -180,6 +180,9 @@ const (
// MetricNamespace defines the teleport prometheus namespace
MetricNamespace = "teleport"
// MetricConnectedResources tracks the number and type of resources connected via keepalives
MetricConnectedResources = "connected_resources"
// MetricBuildInfo tracks build information
MetricBuildInfo = "build_info"