mirror of
https://github.com/gravitational/teleport
synced 2024-10-18 16:24:03 +00:00
Metrics: add running services metric (#34999)
* Metrics: add running services metric This PR adds a new gauge metric: `teleport_services` This metric has a label identifying the service and whether or not it is running. Those services are the ones started in the supervisor. Eg, proxy.web, discovery.init, ssh.node, auth.tls When the service stops, the counter is decreased. This gives us an overview of the currently running services in the process. * Consider only a subset of services * use friendly name for service names * improve metric's help message * Update lib/service/supervisor.go Co-authored-by: rosstimothy <39066650+rosstimothy@users.noreply.github.com> * Fix service names --------- Co-authored-by: rosstimothy <39066650+rosstimothy@users.noreply.github.com>
This commit is contained in:
parent
88fa225dd4
commit
ab6cf95459
|
@ -25,9 +25,11 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/gravitational/trace"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/gravitational/teleport"
|
||||
"github.com/gravitational/teleport/lib/observability/metrics"
|
||||
)
|
||||
|
||||
// Supervisor implements the simple service logic - registering
|
||||
|
@ -273,11 +275,38 @@ type ExitEventPayload struct {
|
|||
Error error
|
||||
}
|
||||
|
||||
var metricsServicesRunning = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: teleport.MetricNamespace,
|
||||
Name: teleport.MetricTeleportServices,
|
||||
Help: "Teleport services currently enabled and running",
|
||||
},
|
||||
[]string{teleport.TagServiceName},
|
||||
)
|
||||
var metricsServicesRunningMap = map[string]string{
|
||||
"discovery.init": "discovery_service",
|
||||
"ssh.node": "ssh_service",
|
||||
"auth.tls": "auth_service",
|
||||
"proxy.web": "proxy_service",
|
||||
"kube.init": "kubernetes_service",
|
||||
"apps.start": "application_service",
|
||||
"db.init": "database_service",
|
||||
"windows_desktop.init": "windows_desktop_service",
|
||||
"okta.init": "okta_service",
|
||||
"jamf.init": "jamf_service",
|
||||
}
|
||||
|
||||
func (s *LocalSupervisor) serve(srv Service) {
|
||||
s.wg.Add(1)
|
||||
go func() {
|
||||
defer s.wg.Done()
|
||||
defer s.RemoveService(srv)
|
||||
|
||||
if label, ok := metricsServicesRunningMap[srv.Name()]; ok {
|
||||
metricsServicesRunning.WithLabelValues(label).Inc()
|
||||
defer metricsServicesRunning.WithLabelValues(label).Dec()
|
||||
}
|
||||
|
||||
l := s.log.WithField("service", srv.Name())
|
||||
l.Debug("Service has started.")
|
||||
err := srv.Serve()
|
||||
|
@ -307,6 +336,10 @@ func (s *LocalSupervisor) Start() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
if err := metrics.RegisterPrometheusCollectors(metricsServicesRunning); err != nil {
|
||||
return trace.Wrap(err)
|
||||
}
|
||||
|
||||
for _, srv := range s.services {
|
||||
s.serve(srv)
|
||||
}
|
||||
|
|
|
@ -244,6 +244,9 @@ const (
|
|||
// (as defined by types.PluginStatus) for a plugin instance
|
||||
MetricHostedPluginStatus = "hosted_plugin_status"
|
||||
|
||||
// MetricTeleportServices tracks which services are currently running in the current Teleport Process.
|
||||
MetricTeleportServices = "services"
|
||||
|
||||
// TagRange is a tag specifying backend requests
|
||||
TagRange = "range"
|
||||
|
||||
|
@ -284,6 +287,12 @@ const (
|
|||
// were used for the agent.
|
||||
// This value comes from UpstreamInventoryAgentMetadata (sourced in lib/inventory/metadata.fetchInstallMethods).
|
||||
TagInstallMethods = "install_methods"
|
||||
|
||||
// TagServiceName is the prometheus label to indicate what services are running in the current proxy.
|
||||
// Those services are monitored using the Supervisor.
|
||||
// Only a subset of services are monitored. See [lib/service.metricsServicesRunningMap]
|
||||
// Eg, discovery_service
|
||||
TagServiceName = "service_name"
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
Loading…
Reference in a new issue