From 40459d297e267e6fb229c76d32a2f5897788ac8b Mon Sep 17 00:00:00 2001 From: William P Date: Thu, 20 Nov 2025 15:36:43 -0500 Subject: [PATCH] monitoring: replace prometheus agent manifests with prometheus stack chart --- system-apps/monitoring/Chart.yaml | 28 ++++ system-apps/monitoring/clusterrole.yaml | 21 --- .../monitoring/clusterrolebinding.yaml | 12 -- system-apps/monitoring/configmap.yaml | 108 ---------------- system-apps/monitoring/deployment.yaml | 41 ------ .../nodeExporter-clusterRole.yaml | 22 ---- .../nodeExporter-clusterRoleBinding.yaml | 17 --- .../node-exporter/nodeExporter-daemonset.yaml | 121 ------------------ .../nodeExporter-networkPolicy.yaml | 29 ----- .../node-exporter/nodeExporter-service.yaml | 19 --- .../nodeExporter-serviceAccount.yaml | 11 -- system-apps/monitoring/secret.yaml | 17 --- system-apps/monitoring/serviceaccount.yaml | 5 - system-apps/monitoring/values.yaml | 3 + 14 files changed, 31 insertions(+), 423 deletions(-) create mode 100644 system-apps/monitoring/Chart.yaml delete mode 100644 system-apps/monitoring/clusterrole.yaml delete mode 100644 system-apps/monitoring/clusterrolebinding.yaml delete mode 100644 system-apps/monitoring/configmap.yaml delete mode 100644 system-apps/monitoring/deployment.yaml delete mode 100644 system-apps/monitoring/node-exporter/nodeExporter-clusterRole.yaml delete mode 100644 system-apps/monitoring/node-exporter/nodeExporter-clusterRoleBinding.yaml delete mode 100644 system-apps/monitoring/node-exporter/nodeExporter-daemonset.yaml delete mode 100644 system-apps/monitoring/node-exporter/nodeExporter-networkPolicy.yaml delete mode 100644 system-apps/monitoring/node-exporter/nodeExporter-service.yaml delete mode 100644 system-apps/monitoring/node-exporter/nodeExporter-serviceAccount.yaml delete mode 100644 system-apps/monitoring/secret.yaml delete mode 100644 system-apps/monitoring/serviceaccount.yaml create mode 100644 system-apps/monitoring/values.yaml diff --git a/system-apps/monitoring/Chart.yaml b/system-apps/monitoring/Chart.yaml new file mode 100644 index 0000000..a4ad5da --- /dev/null +++ b/system-apps/monitoring/Chart.yaml @@ -0,0 +1,28 @@ +apiVersion: v2 +name: kube-prometheus-stack +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: "1.0" + +dependencies: +- name: kube-prometheus-stack + version: 79.6.1 + repository: https://prometheus-community.github.io/helm-charts \ No newline at end of file diff --git a/system-apps/monitoring/clusterrole.yaml b/system-apps/monitoring/clusterrole.yaml deleted file mode 100644 index 489e11c..0000000 --- a/system-apps/monitoring/clusterrole.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: prometheus-agent -rules: - - apiGroups: [""] - resources: - - nodes - - nodes/proxy - - nodes/metrics - - services - - endpoints - - pods - verbs: ["get", "list", "watch"] - - - apiGroups: ["extensions"] - resources: ["ingresses"] - verbs: ["get", "list", "watch"] - - - nonResourceURLs: ["/metrics"] - verbs: ["get"] \ No newline at end of file diff --git a/system-apps/monitoring/clusterrolebinding.yaml b/system-apps/monitoring/clusterrolebinding.yaml deleted file mode 100644 index 2db1bf8..0000000 --- a/system-apps/monitoring/clusterrolebinding.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: prometheus-agent -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: prometheus-agent -subjects: - - kind: ServiceAccount - name: prometheus-agent - namespace: monitoring \ No newline at end of file diff --git a/system-apps/monitoring/configmap.yaml b/system-apps/monitoring/configmap.yaml deleted file mode 100644 index 0dcd5d0..0000000 --- a/system-apps/monitoring/configmap.yaml +++ /dev/null @@ -1,108 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: prom-agent-config - namespace: monitoring -data: - prometheus.yml: | - global: - scrape_interval: 15s - scrape_configs: - - job_name: 'weyma-talos-nodes-kubelet' - kubernetes_sd_configs: - - role: node - scheme: https - tls_config: - insecure_skip_verify: true - authorization: - credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token - relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - - action: labeldrop - regex: cpu_feature_node_kubevirt_io_.+ - - action: labeldrop - regex: cpu_model_migration_node_kubevirt_io_.+ - - action: labeldrop - regex: cpu_model_node_kubevirt_io_.+ - - action: labeldrop - regex: cpu_timer_node_kubevirt_io_.+ - - action: labeldrop - regex: cpu_vendor_node_kubevirt_io_.+ - - action: labeldrop - regex: host_model_cpu_node_kubevirt_io_.+ - - action: labeldrop - regex: host_model_required_features_node_kubevirt_io_.+ - - action: labeldrop - regex: hyperv_node_kubevirt_io_.+ - - job_name: 'weyma-talos-nodes-metrics' - kubernetes_sd_configs: - - role: node - scheme: https - tls_config: - insecure_skip_verify: true - authorization: - credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token - relabel_configs: - - source_labels: [__address__] - regex: (.+):\d+ - target_label: __address__ - replacement: ${1}:9100 - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - - action: labeldrop - regex: cpu_feature_node_kubevirt_io_.+ - - action: labeldrop - regex: cpu_model_migration_node_kubevirt_io_.+ - - action: labeldrop - regex: cpu_model_node_kubevirt_io_.+ - - action: labeldrop - regex: cpu_timer_node_kubevirt_io_.+ - - action: labeldrop - regex: cpu_vendor_node_kubevirt_io_.+ - - action: labeldrop - regex: host_model_cpu_node_kubevirt_io_.+ - - action: labeldrop - regex: host_model_required_features_node_kubevirt_io_.+ - - action: labeldrop - regex: hyperv_node_kubevirt_io_.+ - - job_name: 'weyma-talos-service-endpoints' - kubernetes_sd_configs: - - role: endpoints - relabel_configs: - - source_labels: [__meta_kubernetes_service_label_metrics_enabled] - regex: true - action: keep - - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: namespace - - source_labels: [__meta_kubernetes_service_name] - action: replace - target_label: service - - job_name: 'weyma-talos-rook' - kubernetes_sd_configs: - - role: endpoints - relabel_configs: - - source_labels: [__meta_kubernetes_service_name] - regex: ^rook-ceph-(exporter|mgr)$ - action: keep - - source_labels: [__address__] - regex: ^[^:]+:(9283|9926)$ - action: keep - - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: namespace - - source_labels: [__meta_kubernetes_service_name] - action: replace - target_label: service - remote_write: - - url: "https://10.105.15.20:30104/api/v1/write" - basic_auth: - username: prometheus - password_file: /etc/prometheus/secrets/.basicauthpass - tls_config: - insecure_skip_verify: true \ No newline at end of file diff --git a/system-apps/monitoring/deployment.yaml b/system-apps/monitoring/deployment.yaml deleted file mode 100644 index 898693b..0000000 --- a/system-apps/monitoring/deployment.yaml +++ /dev/null @@ -1,41 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: prometheus-agent -spec: - replicas: 0 - selector: - matchLabels: - app: prometheus-agent - template: - metadata: - labels: - app: prometheus-agent - spec: - serviceAccountName: prometheus-agent - containers: - - name: prometheus - image: prom/prometheus:v3.7.3 - args: - - "--config.file=/etc/prometheus/prometheus.yml" - - "--agent" - resources: - requests: - cpu: 200m - memory: 256Mi - limits: - cpu: 500m - memory: 1Gi - volumeMounts: - - name: config-volume - mountPath: /etc/prometheus - - name: auth - mountPath: /etc/prometheus/secrets - volumes: - - name: config-volume - configMap: - name: prom-agent-config - - name: auth - secret: - secretName: prometheus-auth - diff --git a/system-apps/monitoring/node-exporter/nodeExporter-clusterRole.yaml b/system-apps/monitoring/node-exporter/nodeExporter-clusterRole.yaml deleted file mode 100644 index 4e689a8..0000000 --- a/system-apps/monitoring/node-exporter/nodeExporter-clusterRole.yaml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: node-exporter - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 1.9.1 - name: node-exporter -rules: -- apiGroups: - - authentication.k8s.io - resources: - - tokenreviews - verbs: - - create -- apiGroups: - - authorization.k8s.io - resources: - - subjectaccessreviews - verbs: - - create diff --git a/system-apps/monitoring/node-exporter/nodeExporter-clusterRoleBinding.yaml b/system-apps/monitoring/node-exporter/nodeExporter-clusterRoleBinding.yaml deleted file mode 100644 index 4ed4bd8..0000000 --- a/system-apps/monitoring/node-exporter/nodeExporter-clusterRoleBinding.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: node-exporter - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 1.9.1 - name: node-exporter -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: node-exporter -subjects: -- kind: ServiceAccount - name: node-exporter - namespace: monitoring diff --git a/system-apps/monitoring/node-exporter/nodeExporter-daemonset.yaml b/system-apps/monitoring/node-exporter/nodeExporter-daemonset.yaml deleted file mode 100644 index e4be35b..0000000 --- a/system-apps/monitoring/node-exporter/nodeExporter-daemonset.yaml +++ /dev/null @@ -1,121 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: node-exporter - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 1.9.1 - name: node-exporter - namespace: monitoring -spec: - selector: - matchLabels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: node-exporter - app.kubernetes.io/part-of: kube-prometheus - template: - metadata: - annotations: - kubectl.kubernetes.io/default-container: node-exporter - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: node-exporter - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 1.9.1 - spec: - automountServiceAccountToken: true - containers: - - args: - - --web.listen-address=127.0.0.1:9100 - - --path.sysfs=/host/sys - - --path.rootfs=/host/root - - --path.udev.data=/host/root/run/udev/data - - --no-collector.wifi - - --no-collector.hwmon - - --no-collector.btrfs - - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/k3s/containerd/.+|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) - - --collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$ - - --collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$ - image: quay.io/prometheus/node-exporter:v1.9.1 - name: node-exporter - resources: - limits: - cpu: 250m - memory: 180Mi - requests: - cpu: 102m - memory: 180Mi - securityContext: - allowPrivilegeEscalation: false - capabilities: - add: - - SYS_TIME - drop: - - ALL - readOnlyRootFilesystem: true - volumeMounts: - - mountPath: /host/sys - mountPropagation: HostToContainer - name: sys - readOnly: true - - mountPath: /host/root - mountPropagation: HostToContainer - name: root - readOnly: true - - args: - - --secure-listen-address=[$(IP)]:9100 - - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 - - --upstream=http://127.0.0.1:9100/ - env: - - name: IP - valueFrom: - fieldRef: - fieldPath: status.podIP - image: quay.io/brancz/kube-rbac-proxy:v0.19.1 - name: kube-rbac-proxy - ports: - - containerPort: 9100 - hostPort: 9100 - name: https - resources: - limits: - cpu: 20m - memory: 40Mi - requests: - cpu: 10m - memory: 20Mi - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsGroup: 65532 - runAsNonRoot: true - runAsUser: 65532 - seccompProfile: - type: RuntimeDefault - hostNetwork: true - hostPID: true - nodeSelector: - kubernetes.io/os: linux - priorityClassName: system-cluster-critical - securityContext: - runAsGroup: 65534 - runAsNonRoot: true - runAsUser: 65534 - serviceAccountName: node-exporter - tolerations: - - operator: Exists - volumes: - - hostPath: - path: /sys - name: sys - - hostPath: - path: / - name: root - updateStrategy: - rollingUpdate: - maxUnavailable: 10% - type: RollingUpdate diff --git a/system-apps/monitoring/node-exporter/nodeExporter-networkPolicy.yaml b/system-apps/monitoring/node-exporter/nodeExporter-networkPolicy.yaml deleted file mode 100644 index 4d45160..0000000 --- a/system-apps/monitoring/node-exporter/nodeExporter-networkPolicy.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: networking.k8s.io/v1 -kind: NetworkPolicy -metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: node-exporter - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 1.9.1 - name: node-exporter - namespace: monitoring -spec: - egress: - - {} - ingress: - - from: - - podSelector: - matchLabels: - app.kubernetes.io/name: prometheus - ports: - - port: 9100 - protocol: TCP - podSelector: - matchLabels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: node-exporter - app.kubernetes.io/part-of: kube-prometheus - policyTypes: - - Egress - - Ingress diff --git a/system-apps/monitoring/node-exporter/nodeExporter-service.yaml b/system-apps/monitoring/node-exporter/nodeExporter-service.yaml deleted file mode 100644 index 3da2699..0000000 --- a/system-apps/monitoring/node-exporter/nodeExporter-service.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: node-exporter - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 1.9.1 - name: node-exporter - namespace: monitoring -spec: - ports: - - name: https - port: 9100 - targetPort: https - selector: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: node-exporter - app.kubernetes.io/part-of: kube-prometheus diff --git a/system-apps/monitoring/node-exporter/nodeExporter-serviceAccount.yaml b/system-apps/monitoring/node-exporter/nodeExporter-serviceAccount.yaml deleted file mode 100644 index e08271b..0000000 --- a/system-apps/monitoring/node-exporter/nodeExporter-serviceAccount.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: v1 -automountServiceAccountToken: false -kind: ServiceAccount -metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: node-exporter - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 1.9.1 - name: node-exporter - namespace: monitoring diff --git a/system-apps/monitoring/secret.yaml b/system-apps/monitoring/secret.yaml deleted file mode 100644 index bc044ff..0000000 --- a/system-apps/monitoring/secret.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: prometheus-auth -spec: - refreshInterval: 1h - secretStoreRef: - name: weyma-vault - kind: ClusterSecretStore - target: - name: prometheus-auth - creationPolicy: Owner - data: - - secretKey: .basicauthpass - remoteRef: - key: monitoring - property: prometheus-password \ No newline at end of file diff --git a/system-apps/monitoring/serviceaccount.yaml b/system-apps/monitoring/serviceaccount.yaml deleted file mode 100644 index d381af0..0000000 --- a/system-apps/monitoring/serviceaccount.yaml +++ /dev/null @@ -1,5 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: prometheus-agent - namespace: monitoring \ No newline at end of file diff --git a/system-apps/monitoring/values.yaml b/system-apps/monitoring/values.yaml new file mode 100644 index 0000000..8651654 --- /dev/null +++ b/system-apps/monitoring/values.yaml @@ -0,0 +1,3 @@ +kube-prometheus-stack: + grafana: + enabled: false # Grafana is instead deployed in its own namespace in the core-apps repo \ No newline at end of file