Add prometheus metrics collection to load test (#10591)

This adds a prometheus instance with remote write as part of the load test and persists results to remote prometheus target. Adds labels to auth pods to differentiate the backend being tested as well as annotations to indicate the pods should be scraped for metrics.
This commit is contained in:
Carson Anderson 2022-03-03 10:42:55 -07:00 committed by GitHub
parent 18a7696442
commit caadbca811
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 325 additions and 5 deletions

View file

@ -115,7 +115,7 @@ install-teleport: install-auth install-proxy install-node install-iot-node
.PHONY: delete-teleport
delete-teleport: delete-tc delete-nodes delete-proxy delete-auth
# installs grafana and influxdb
# installs grafana, influxdb, and prometheus
.PHONY: install-monitor
install-monitor:
kubectl create configmap grafana-config -n loadtest \
@ -128,13 +128,16 @@ install-monitor:
kubectl apply -f influxdb.yaml
@make expand-yaml FILENAME=grafana
kubectl apply -f grafana-gen.yaml
@make expand-yaml FILENAME=prometheus
kubectl apply -f prometheus-gen.yaml
# deletes grafana and influxdb deployments, services and configmaps
# deletes grafana, influxdb, and prometheus deployments, services and configmaps
.PHONY: delete-monitor
delete-monitor:
kubectl delete -f influxdb.yaml --ignore-not-found
kubectl delete -f grafana-gen.yaml --ignore-not-found
kubectl delete configmap grafana-config -n loadtest --ignore-not-found
kubectl delete -f prometheus-gen.yaml --ignore-not-found
# installs an etcd cluster
.PHONY: install-etcd

View file

@ -14,6 +14,9 @@ spec:
metadata:
labels:
teleport-role: auth
backend: etcd
prometheus.io/scrape: "true"
prometheus.io/port: "3434"
spec:
volumes:
- name: config

View file

@ -14,6 +14,9 @@ spec:
metadata:
labels:
teleport-role: auth
backend: firestore
prometheus.io/scrape: "true"
prometheus.io/port: "3434"
spec:
volumes:
- name: config

View file

@ -0,0 +1,291 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: prometheus-loadtest
---
# Source: prometheus/templates/server/serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
component: "server"
app: prometheus
release: prometheus
chart: prometheus-15.4.0
heritage: Helm
name: prometheus-server
namespace: prometheus-loadtest
annotations:
{}
---
# Source: prometheus/templates/server/cm.yaml
apiVersion: v1
kind: ConfigMap
metadata:
labels:
component: "server"
app: prometheus
release: prometheus
chart: prometheus-15.4.0
heritage: Helm
name: prometheus-server
namespace: prometheus-loadtest
data:
alerting_rules.yml: |
{}
alerts: |
{}
prometheus.yml: |
global:
evaluation_interval: 1m
scrape_interval: 1m
scrape_timeout: 10s
remote_write:
- url: ${PROM_REMOTE_URL}
basic_auth:
username: ${PROM_USER}
password: ${PROM_PASSWORD}
rule_files:
- /etc/config/recording_rules.yml
- /etc/config/alerting_rules.yml
- /etc/config/rules
- /etc/config/alerts
scrape_configs:
- job_name: kubernetes-pods
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_scrape
- action: drop
regex: true
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_scrape_slow
- action: replace
regex: (https?)
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_scheme
target_label: __scheme__
- action: replace
regex: (.+)
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_path
target_label: __metrics_path__
- action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
source_labels:
- __address__
- __meta_kubernetes_pod_annotation_prometheus_io_port
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: drop
regex: Pending|Succeeded|Failed|Completed
source_labels:
- __meta_kubernetes_pod_phase
recording_rules.yml: |
{}
rules: |
{}
---
# Source: prometheus/templates/server/clusterrole.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
component: "server"
app: prometheus
release: prometheus
chart: prometheus-15.4.0
heritage: Helm
name: prometheus-server
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/proxy
- nodes/metrics
- services
- endpoints
- pods
- ingresses
- configmaps
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
- "networking.k8s.io"
resources:
- ingresses/status
- ingresses
verbs:
- get
- list
- watch
- nonResourceURLs:
- "/metrics"
verbs:
- get
---
# Source: prometheus/templates/server/clusterrolebinding.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
component: "server"
app: prometheus
release: prometheus
chart: prometheus-15.4.0
heritage: Helm
name: prometheus-server
subjects:
- kind: ServiceAccount
name: prometheus-server
namespace: prometheus-loadtest
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-server
---
# Source: prometheus/templates/server/service.yaml
apiVersion: v1
kind: Service
metadata:
labels:
component: "server"
app: prometheus
release: prometheus
chart: prometheus-15.4.0
heritage: Helm
name: prometheus-server
namespace: prometheus-loadtest
spec:
ports:
- name: http
port: 80
protocol: TCP
targetPort: 9090
selector:
component: "server"
app: prometheus
release: prometheus
sessionAffinity: None
type: "ClusterIP"
---
# Source: prometheus/templates/server/deploy.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
component: "server"
app: prometheus
release: prometheus
chart: prometheus-15.4.0
heritage: Helm
name: prometheus-server
namespace: prometheus-loadtest
spec:
selector:
matchLabels:
component: "server"
app: prometheus
release: prometheus
replicas: 1
template:
metadata:
labels:
component: "server"
app: prometheus
release: prometheus
chart: prometheus-15.4.0
heritage: Helm
spec:
enableServiceLinks: true
serviceAccountName: prometheus-server
containers:
- name: prometheus-server-configmap-reload
image: "jimmidyson/configmap-reload:v0.5.0"
imagePullPolicy: "IfNotPresent"
args:
- --volume-dir=/etc/config
- --webhook-url=http://127.0.0.1:9090/-/reload
resources:
{}
volumeMounts:
- name: config-volume
mountPath: /etc/config
readOnly: true
- name: prometheus-server
image: "quay.io/prometheus/prometheus:v2.31.1"
imagePullPolicy: "IfNotPresent"
args:
- --storage.tsdb.retention.time=15d
- --config.file=/etc/config/prometheus.yml
- --storage.tsdb.path=/data
- --web.console.libraries=/etc/prometheus/console_libraries
- --web.console.templates=/etc/prometheus/consoles
- --web.enable-lifecycle
ports:
- containerPort: 9090
readinessProbe:
httpGet:
path: /-/ready
port: 9090
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 5
timeoutSeconds: 4
failureThreshold: 3
successThreshold: 1
livenessProbe:
httpGet:
path: /-/healthy
port: 9090
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 10
failureThreshold: 3
successThreshold: 1
resources:
{}
volumeMounts:
- name: config-volume
mountPath: /etc/config
- name: storage-volume
mountPath: /data
subPath: ""
hostNetwork: false
dnsPolicy: ClusterFirst
securityContext:
fsGroup: 65534
runAsGroup: 65534
runAsNonRoot: true
runAsUser: 65534
terminationGracePeriodSeconds: 300
volumes:
- name: config-volume
configMap:
name: prometheus-server
- name: storage-volume
emptyDir:
{}

View file

@ -14,6 +14,8 @@ spec:
metadata:
labels:
teleport-role: proxy
prometheus.io/scrape: "true"
prometheus.io/port: "3434"
spec:
volumes:
- name: config

View file

@ -23,6 +23,21 @@ env:
exit 1; \
fi
@if [ -z ${PROM_REMOTE_URL} ]; then \
echo "PROM_REMOTE_URL is not set, cannot apply cluster."; \
exit 1; \
fi
@if [ -z ${PROM_USER} ]; then \
echo "PROM_USER is not set, cannot apply cluster."; \
exit 1; \
fi
@if [ -z ${PROM_PASSWORD} ]; then \
echo "PROM_PASSWORD is not set, cannot apply cluster."; \
exit 1; \
fi
@echo PROXY_IP=$(shell make -C ../../network get-proxy-ip) > secrets.env
@echo PROXY_HOST=${PROXY_HOST} >> secrets.env
@echo GRAFANA_IP=$(shell make -C ../../network get-grafana-ip) >> secrets.env
@ -31,6 +46,9 @@ env:
@echo PROXY_TOKEN=$(shell cat proxy-token) >> secrets.env
@echo TC_TOKEN=$(shell cat tc-token) >> secrets.env
@echo GCP_PROJECT=$(shell make -C ../../cluster get-project) >> secrets.env
@echo PROM_REMOTE_URL=${PROM_REMOTE_URL} >> secrets.env
@echo PROM_USER=${PROM_USER} >> secrets.env
@echo PROM_PASSWORD=${PROM_PASSWORD} >> secrets.env
grafana-pass:
openssl rand -base64 32 | tr -d '\n' > grafana-pass