monitoring: replace prometheus agent manifests with prometheus stack chart

This commit is contained in:
2025-11-20 15:36:43 -05:00
parent 4e270f7cef
commit 40459d297e
14 changed files with 31 additions and 423 deletions

View File

@@ -0,0 +1,28 @@
apiVersion: v2
name: kube-prometheus-stack
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
appVersion: "1.0"
dependencies:
- name: kube-prometheus-stack
version: 79.6.1
repository: https://prometheus-community.github.io/helm-charts

View File

@@ -1,21 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-agent
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- nodes/metrics
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: ["extensions"]
resources: ["ingresses"]
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]

View File

@@ -1,12 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus-agent
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-agent
subjects:
- kind: ServiceAccount
name: prometheus-agent
namespace: monitoring

View File

@@ -1,108 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: prom-agent-config
namespace: monitoring
data:
prometheus.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'weyma-talos-nodes-kubelet'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
insecure_skip_verify: true
authorization:
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- action: labeldrop
regex: cpu_feature_node_kubevirt_io_.+
- action: labeldrop
regex: cpu_model_migration_node_kubevirt_io_.+
- action: labeldrop
regex: cpu_model_node_kubevirt_io_.+
- action: labeldrop
regex: cpu_timer_node_kubevirt_io_.+
- action: labeldrop
regex: cpu_vendor_node_kubevirt_io_.+
- action: labeldrop
regex: host_model_cpu_node_kubevirt_io_.+
- action: labeldrop
regex: host_model_required_features_node_kubevirt_io_.+
- action: labeldrop
regex: hyperv_node_kubevirt_io_.+
- job_name: 'weyma-talos-nodes-metrics'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
insecure_skip_verify: true
authorization:
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__address__]
regex: (.+):\d+
target_label: __address__
replacement: ${1}:9100
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- action: labeldrop
regex: cpu_feature_node_kubevirt_io_.+
- action: labeldrop
regex: cpu_model_migration_node_kubevirt_io_.+
- action: labeldrop
regex: cpu_model_node_kubevirt_io_.+
- action: labeldrop
regex: cpu_timer_node_kubevirt_io_.+
- action: labeldrop
regex: cpu_vendor_node_kubevirt_io_.+
- action: labeldrop
regex: host_model_cpu_node_kubevirt_io_.+
- action: labeldrop
regex: host_model_required_features_node_kubevirt_io_.+
- action: labeldrop
regex: hyperv_node_kubevirt_io_.+
- job_name: 'weyma-talos-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_label_metrics_enabled]
regex: true
action: keep
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- job_name: 'weyma-talos-rook'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_name]
regex: ^rook-ceph-(exporter|mgr)$
action: keep
- source_labels: [__address__]
regex: ^[^:]+:(9283|9926)$
action: keep
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
remote_write:
- url: "https://10.105.15.20:30104/api/v1/write"
basic_auth:
username: prometheus
password_file: /etc/prometheus/secrets/.basicauthpass
tls_config:
insecure_skip_verify: true

View File

@@ -1,41 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-agent
spec:
replicas: 0
selector:
matchLabels:
app: prometheus-agent
template:
metadata:
labels:
app: prometheus-agent
spec:
serviceAccountName: prometheus-agent
containers:
- name: prometheus
image: prom/prometheus:v3.7.3
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--agent"
resources:
requests:
cpu: 200m
memory: 256Mi
limits:
cpu: 500m
memory: 1Gi
volumeMounts:
- name: config-volume
mountPath: /etc/prometheus
- name: auth
mountPath: /etc/prometheus/secrets
volumes:
- name: config-volume
configMap:
name: prom-agent-config
- name: auth
secret:
secretName: prometheus-auth

View File

@@ -1,22 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.9.1
name: node-exporter
rules:
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create

View File

@@ -1,17 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.9.1
name: node-exporter
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: node-exporter
subjects:
- kind: ServiceAccount
name: node-exporter
namespace: monitoring

View File

@@ -1,121 +0,0 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.9.1
name: node-exporter
namespace: monitoring
spec:
selector:
matchLabels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
annotations:
kubectl.kubernetes.io/default-container: node-exporter
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.9.1
spec:
automountServiceAccountToken: true
containers:
- args:
- --web.listen-address=127.0.0.1:9100
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
- --path.udev.data=/host/root/run/udev/data
- --no-collector.wifi
- --no-collector.hwmon
- --no-collector.btrfs
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/k3s/containerd/.+|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
- --collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$
- --collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$
image: quay.io/prometheus/node-exporter:v1.9.1
name: node-exporter
resources:
limits:
cpu: 250m
memory: 180Mi
requests:
cpu: 102m
memory: 180Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
add:
- SYS_TIME
drop:
- ALL
readOnlyRootFilesystem: true
volumeMounts:
- mountPath: /host/sys
mountPropagation: HostToContainer
name: sys
readOnly: true
- mountPath: /host/root
mountPropagation: HostToContainer
name: root
readOnly: true
- args:
- --secure-listen-address=[$(IP)]:9100
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:9100/
env:
- name: IP
valueFrom:
fieldRef:
fieldPath: status.podIP
image: quay.io/brancz/kube-rbac-proxy:v0.19.1
name: kube-rbac-proxy
ports:
- containerPort: 9100
hostPort: 9100
name: https
resources:
limits:
cpu: 20m
memory: 40Mi
requests:
cpu: 10m
memory: 20Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65532
seccompProfile:
type: RuntimeDefault
hostNetwork: true
hostPID: true
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
securityContext:
runAsGroup: 65534
runAsNonRoot: true
runAsUser: 65534
serviceAccountName: node-exporter
tolerations:
- operator: Exists
volumes:
- hostPath:
path: /sys
name: sys
- hostPath:
path: /
name: root
updateStrategy:
rollingUpdate:
maxUnavailable: 10%
type: RollingUpdate

View File

@@ -1,29 +0,0 @@
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.9.1
name: node-exporter
namespace: monitoring
spec:
egress:
- {}
ingress:
- from:
- podSelector:
matchLabels:
app.kubernetes.io/name: prometheus
ports:
- port: 9100
protocol: TCP
podSelector:
matchLabels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
policyTypes:
- Egress
- Ingress

View File

@@ -1,19 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.9.1
name: node-exporter
namespace: monitoring
spec:
ports:
- name: https
port: 9100
targetPort: https
selector:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus

View File

@@ -1,11 +0,0 @@
apiVersion: v1
automountServiceAccountToken: false
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.9.1
name: node-exporter
namespace: monitoring

View File

@@ -1,17 +0,0 @@
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: prometheus-auth
spec:
refreshInterval: 1h
secretStoreRef:
name: weyma-vault
kind: ClusterSecretStore
target:
name: prometheus-auth
creationPolicy: Owner
data:
- secretKey: .basicauthpass
remoteRef:
key: monitoring
property: prometheus-password

View File

@@ -1,5 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-agent
namespace: monitoring

View File

@@ -0,0 +1,3 @@
kube-prometheus-stack:
grafana:
enabled: false # Grafana is instead deployed in its own namespace in the core-apps repo