From 313323704f9132387ed60b39106861c36d1afdc9 Mon Sep 17 00:00:00 2001
From: Hugo Shaka <hugo.hervieux@goteleport.com>
Date: Wed, 30 Aug 2023 14:57:37 -0400
Subject: [PATCH] [buddy] Add an optional PodMonitor to the teleport-kube-agent
 chart (#31169)

* feat: add in podmonitor to teleport-kube-agent helmchart

* fix: backport fixes to linting and tests to teleport-cluster chart

* docs: document the `podMonitor` field in chart references

* Apply suggestions from code review

* Lint

---------

Co-authored-by: ThameezBo <thameezbo@double.cloud>
---
 .../helm-reference/teleport-cluster.mdx       | 39 +++++++++++++++++
 .../helm-reference/teleport-kube-agent.mdx    | 39 +++++++++++++++++
 .../teleport-cluster/.lint/podmonitor.yaml    |  6 +++
 .../tests/podmonitor_test.yaml                | 13 +++---
 .../teleport-kube-agent/.lint/podmonitor.yaml |  7 +++
 .../templates/podmonitor.yaml                 | 31 +++++++++++++
 .../tests/podmonitor_test.yaml                | 43 +++++++++++++++++++
 .../teleport-kube-agent/values.schema.json    | 24 +++++++++++
 .../chart/teleport-kube-agent/values.yaml     | 15 +++++++
 9 files changed, 211 insertions(+), 6 deletions(-)
 create mode 100644 examples/chart/teleport-cluster/.lint/podmonitor.yaml
 create mode 100644 examples/chart/teleport-kube-agent/.lint/podmonitor.yaml
 create mode 100644 examples/chart/teleport-kube-agent/templates/podmonitor.yaml
 create mode 100644 examples/chart/teleport-kube-agent/tests/podmonitor_test.yaml
diff --git a/docs/pages/reference/helm-reference/teleport-cluster.mdx b/docs/pages/reference/helm-reference/teleport-cluster.mdx
index 10672d313dc..a73c0fb2ef5 100644
--- a/docs/pages/reference/helm-reference/teleport-cluster.mdx
+++ b/docs/pages/reference/helm-reference/teleport-cluster.mdx
@@ -804,6 +804,45 @@ recommended to use one of the other modes and rely on
 to inject your custom configuration.
 </Admonition>
 
+## `podMonitor`
+
+`podMonitor` controls [the PodMonitor CR (from monitoring.coreos.com/v1)
+](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.PodMonitor)
+that monitors the workload (Auth and Proxy Services) deployed by the chart.
+This custom resource configures Prometheus and makes it scrape Teleport metrics.
+
+The CRD is deployed by the prometheus-operator and allows workload to
+get monitored. You need to deploy the `prometheus-operator`
+in the cluster prior to configuring the `podMonitor` section of the chart. See
+[the prometheus-operator documentation](https://prometheus-operator.dev/docs/prologue/introduction/)
+for setup instructions.
+
+### `podMonitor.enabled`
+
+| Type   | Default value |
+|--------|---------------|
+| `bool` | `false`       |
+
+Whether the chart should deploy a `PodMonitor` resource. This is disabled by
+default as it requires the `PodMonitor` CRD to be installed in the cluster.
+
+### `podMonitor.additionalLabels`
+
+| Type                   | Default value              |
+|------------------------|----------------------------|
+| `object[string]string` | `{"prometheus":"default"}` |
+
+Additional labels to put on the created PodMonitor Resource.
+Those labels are used to be selected by a specific Prometheus instance.
+
+### `podMonitor.interval`
+
+| Type     | Default value |
+|----------|---------------|
+| `string` | `30s`         |
+
+`interval` is the interval between two metrics scrapes by Prometheus.
+
 ## `persistence`
 
 <Details title="Read this if using Kubernetes 1.23+ on EKS">
diff --git a/docs/pages/reference/helm-reference/teleport-kube-agent.mdx b/docs/pages/reference/helm-reference/teleport-kube-agent.mdx
index ae60e77ae65..5876a94e81e 100644
--- a/docs/pages/reference/helm-reference/teleport-kube-agent.mdx
+++ b/docs/pages/reference/helm-reference/teleport-kube-agent.mdx
@@ -1080,6 +1080,45 @@ Ensures that this number of replicas is available during voluntary disruptions,
       minAvailable: 1
   ```
 
+## `podMonitor`
+
+`podMonitor` controls [the PodMonitor CR (from monitoring.coreos.com/v1)
+](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.PodMonitor)
+that monitors the workload (Auth and Proxy Services) deployed by the chart.
+This custom resource configures Prometheus and makes it scrape Teleport metrics.
+
+The CRD is deployed by the prometheus-operator and allows workload to
+get monitored. You need to deploy the `prometheus-operator`
+in the cluster prior to configuring the `podMonitor` section of the chart. See
+[the prometheus-operator documentation](https://prometheus-operator.dev/docs/prologue/introduction/)
+for setup instructions.
+
+### `podMonitor.enabled`
+
+| Type   | Default value |
+|--------|---------------|
+| `bool` | `false`       |
+
+Whether the chart should deploy a `PodMonitor` resource. This is disabled by
+default as it requires the `PodMonitor` CRD to be installed in the cluster.
+
+### `podMonitor.additionalLabels`
+
+| Type                   | Default value              |
+|------------------------|----------------------------|
+| `object[string]string` | `{"prometheus":"default"}` |
+
+Additional labels to put on the created PodMonitor Resource.
+Those labels are used to be selected by a specific Prometheus instance.
+
+### `podMonitor.interval`
+
+| Type     | Default value |
+|----------|---------------|
+| `string` | `30s`         |
+
+`interval` is the interval between two metrics scrapes by Prometheus.
+
 ## `clusterRoleName`
 
 | Type     | Default value |
diff --git a/examples/chart/teleport-cluster/.lint/podmonitor.yaml b/examples/chart/teleport-cluster/.lint/podmonitor.yaml
new file mode 100644
index 00000000000..1c263f5e00b
--- /dev/null
+++ b/examples/chart/teleport-cluster/.lint/podmonitor.yaml
@@ -0,0 +1,6 @@
+clusterName: test-kube-cluster-name
+podMonitor:
+  enabled: true
+  additionalLabels:
+    prometheus: default
+  interval: 30s
diff --git a/examples/chart/teleport-cluster/tests/podmonitor_test.yaml b/examples/chart/teleport-cluster/tests/podmonitor_test.yaml
index d07ebc9a8bf..ccdf692ef74 100644
--- a/examples/chart/teleport-cluster/tests/podmonitor_test.yaml
+++ b/examples/chart/teleport-cluster/tests/podmonitor_test.yaml
@@ -3,12 +3,15 @@ templates:
   - podmonitor.yaml
 tests:
   - it: does not create a PodMonitor by default
+    set:
+      clusterName: test-kube-cluster-name
     asserts:
       - hasDocuments:
           count: 0
 
   - it: creates a PodMonitor when enabled
     set:
+      clusterName: test-kube-cluster-name
       podMonitor:
         enabled: true
     asserts:
@@ -19,6 +22,7 @@ tests:
 
   - it: configures scrape interval if provided
     set:
+      clusterName: test-kube-cluster-name
       podMonitor:
         enabled: true
         interval: 2m
@@ -28,12 +32,9 @@ tests:
           value: 2m
 
   - it: wears additional labels if provided
-    set:
-      podMonitor:
-        enabled: true
-        additionalLabels:
-          prometheus: teleport-only
     asserts:
       - equal:
           path: metadata.labels.prometheus
-          value: teleport-only
+          value: default
+    values:
+      - ../.lint/podmonitor.yaml
\ No newline at end of file
diff --git a/examples/chart/teleport-kube-agent/.lint/podmonitor.yaml b/examples/chart/teleport-kube-agent/.lint/podmonitor.yaml
new file mode 100644
index 00000000000..2cdb90bd497
--- /dev/null
+++ b/examples/chart/teleport-kube-agent/.lint/podmonitor.yaml
@@ -0,0 +1,7 @@
+proxyAddr: proxy.example.com:3080
+kubeClusterName: test-kube-cluster-name
+podMonitor:
+  enabled: true
+  additionalLabels:
+    prometheus: default
+  interval: 30s
diff --git a/examples/chart/teleport-kube-agent/templates/podmonitor.yaml b/examples/chart/teleport-kube-agent/templates/podmonitor.yaml
new file mode 100644
index 00000000000..6bc0ccdaf51
--- /dev/null
+++ b/examples/chart/teleport-kube-agent/templates/podmonitor.yaml
@@ -0,0 +1,31 @@
+{{- if.Values.podMonitor.enabled -}}
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: {{ .Release.Name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+  {{- with .Values.podMonitor.additionalLabels }}
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  jobLabel: {{ .Release.Name }}
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      app: {{ .Release.Name }}
+  podMetricsEndpoints:
+    - port: diag
+      path: /metrics
+      {{- with .Values.podMonitor.interval }}
+      interval: {{ . | quote }}
+      {{- end }}
+  podTargetLabels:
+    - "app.kubernetes.io/name"
+    - "app.kubernetes.io/instance"
+    - "app.kubernetes.io/component"
+    - "app.kubernetes.io/version"
+    - "teleport.dev/majorVersion"
+{{- end }}
diff --git a/examples/chart/teleport-kube-agent/tests/podmonitor_test.yaml b/examples/chart/teleport-kube-agent/tests/podmonitor_test.yaml
new file mode 100644
index 00000000000..474f3469767
--- /dev/null
+++ b/examples/chart/teleport-kube-agent/tests/podmonitor_test.yaml
@@ -0,0 +1,43 @@
+suite: PodMonitor
+templates:
+  - podmonitor.yaml
+tests:
+  - it: does not create a PodMonitor by default
+    set:
+      proxyAddr: proxy.example.com:3080
+      kubeClusterName: test-kube-cluster-name
+    asserts:
+      - hasDocuments:
+          count: 0
+
+  - it: creates a PodMonitor when enabled
+    set:
+      proxyAddr: proxy.example.com:3080
+      kubeClusterName: test-kube-cluster-name
+      podMonitor:
+        enabled: true
+    asserts:
+      - hasDocuments:
+          count: 1
+      - isKind:
+          of: PodMonitor
+
+  - it: configures scrape interval if provided
+    set:
+      proxyAddr: proxy.example.com:3080
+      kubeClusterName: test-kube-cluster-name
+      podMonitor:
+        enabled: true
+        interval: 2m
+    asserts:
+      - equal:
+          path: spec.podMetricsEndpoints[0].interval
+          value: 2m
+
+  - it: wears additional labels if provided
+    asserts:
+      - equal:
+          path: metadata.labels.prometheus
+          value: default
+    values:
+      - ../.lint/podmonitor.yaml
\ No newline at end of file
diff --git a/examples/chart/teleport-kube-agent/values.schema.json b/examples/chart/teleport-kube-agent/values.schema.json
index 91e8e0285da..815cf8d5a79 100644
--- a/examples/chart/teleport-kube-agent/values.schema.json
+++ b/examples/chart/teleport-kube-agent/values.schema.json
@@ -23,6 +23,7 @@
         "clusterRoleBindingName",
         "roleName",
         "roleBindingName",
+        "podMonitor",
         "serviceAccountName",
         "secretName",
         "log",
@@ -368,6 +369,29 @@
                 }
             }
         },
+        "podMonitor": {
+            "$id": "#/properties/podMonitor",
+            "type": "object",
+            "required": ["enabled"],
+            "properties": {
+                "enabled": {
+                    "$id": "#/properties/podMonitor/enabled",
+                    "type": "boolean",
+                    "default": false
+                },
+                "additionalLabels": {
+                    "$id": "#/properties/podMonitor/additionalLabels",
+                    "type": "object",
+                    "default": {"prometheus": "default"},
+                    "additionalProperties": {"type": "string"}
+                },
+                "interval": {
+                    "$id": "#/properties/podMonitor/interval",
+                    "type": "string",
+                    "default": "30s"
+                }
+            }
+        },
         "priorityClassName": {
             "$id": "#/properties/priorityClassName",
             "type": "string",
diff --git a/examples/chart/teleport-kube-agent/values.yaml b/examples/chart/teleport-kube-agent/values.yaml
index e4dff0d8ebb..5c381bf1a3e 100644
--- a/examples/chart/teleport-kube-agent/values.yaml
+++ b/examples/chart/teleport-kube-agent/values.yaml
@@ -198,6 +198,21 @@ highAvailability:
     enabled: false
     minAvailable: 1
 
+# podMonitor controls the PodMonitor CR (from monitoring.coreos.com/v1)
+# This CRD is managed by the prometheus-operator and allows workload to
+# get monitored. To use this value, you need to run a `prometheus-operator`
+# in the cluster for this value to take effect.
+# See https://prometheus-operator.dev/docs/prologue/introduction/
+podMonitor:
+  # Whether the chart should deploy a PodMonitor.
+  # Disabled by default as it requires the PodMonitor CRD to be installed.
+  enabled: false
+  # additionalLabels to put on the PodMonitor.
+  # This is used to be selected by a specific prometheus instance.
+  additionalLabels: {}
+  # interval is the interval between two metrics scrapes. Defaults to 30s
+  interval: 30s
+
 ################################################################
 # Values that must be provided if using persistent storage for Teleport.
 #