Merge pull request 'chore(deps): update helm release argo-cd to v9.1.5' (#142 ) from renovate/argo-cd-9.x into main

Reviewed-on: #142
chore(deps): update helm release argo-cd to v9.1.5
2025-11-30 20:28:29 +00:00 · 2025-11-30 20:00:21 +00:00 · 2025-11-29 19:06:39 -05:00 · 2025-11-29 18:45:28 -05:00 · 2025-11-29 18:38:43 -05:00 · 2025-11-29 00:36:23 +00:00
11 changed files with 66 additions and 406 deletions
--- a/black-start/services/talos-discovery/docker-compose.yaml
+++ b/black-start/services/talos-discovery/docker-compose.yaml
@@ -2,7 +2,7 @@ version: "3.8"
 services:
  discovery:
    restart: unless-stopped
-    image: ghcr.io/siderolabs/discovery-service:v1.0.11
+    image: ghcr.io/siderolabs/discovery-service:v1.0.12
    ports:
      - 10.105.6.215:3000:3000
      - 10.105.6.215:3001:3001
--- a/system-apps/argocd/Chart.yaml
+++ b/system-apps/argocd/Chart.yaml
@@ -24,5 +24,5 @@ appVersion: "1.0"

 dependencies:
 - name: argo-cd
-  version: 9.1.4
+  version: 9.1.5
  repository: https://argoproj.github.io/argo-helm
--- a/system-apps/external-secrets/chart/values.yaml
+++ b/system-apps/external-secrets/chart/values.yaml
@@ -171,7 +171,7 @@ resources: {}

 serviceMonitor:
  # -- Specifies whether to create a ServiceMonitor resource for collecting Prometheus metrics
-  enabled: false
+  enabled: true

  # -- namespace where you want to install ServiceMonitors
  namespace: ""
--- a/system-apps/guestbook/guestbook-ui-deployment.yaml
+++ b/system-apps/guestbook/guestbook-ui-deployment.yaml
@@ -1,21 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: guestbook-ui
-  namespace: guestbook-ui
-spec:
-  replicas: 1
-  revisionHistoryLimit: 3
-  selector:
-    matchLabels:
-      app: guestbook-ui
-  template:
-    metadata:
-      labels:
-        app: guestbook-ui
-    spec:
-      containers:
-      - image: gcr.io/heptio-images/ks-guestbook-demo:0.2
-        name: guestbook-ui
-        ports:
-        - containerPort: 80
--- a/system-apps/guestbook/guestbook-ui-namespace.yaml
+++ b/system-apps/guestbook/guestbook-ui-namespace.yaml
@@ -1,4 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name:  guestbook-ui
--- a/system-apps/guestbook/guestbook-ui-svc.yaml
+++ b/system-apps/guestbook/guestbook-ui-svc.yaml
@@ -1,11 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: guestbook-ui
-  namespace: guestbook-ui
-spec:
-  ports:
-  - port: 80
-    targetPort: 80
-  selector:
-    app: guestbook-ui
--- a/system-apps/metallb/values.yaml
+++ b/system-apps/metallb/values.yaml
@@ -1,354 +1,7 @@
-# Default values for metallb.
-# This is a YAML-formatted file.
-# Declare variables to be passed into your templates.
-
-imagePullSecrets: []
-nameOverride: ""
-fullnameOverride: ""
-loadBalancerClass: ""
-
-# To configure MetalLB, you must specify ONE of the following two
-# options.
-
-rbac:
-  # create specifies whether to install and use RBAC rules.
-  create: true
-
+metallb:
  prometheus:
-  # scrape annotations specifies whether to add Prometheus metric
-  # auto-collection annotations to pods. See
-  # https://github.com/prometheus/prometheus/blob/release-2.1/documentation/examples/prometheus-kubernetes.yml
-  # for a corresponding Prometheus configuration. Alternatively, you
-  # may want to use the Prometheus Operator
-  # (https://github.com/coreos/prometheus-operator) for more powerful
-  # monitoring configuration. If you use the Prometheus operator, this
-  # can be left at false.
-  scrapeAnnotations: false
-
-  # port both controller and speaker will listen on for metrics
-  metricsPort: 7472
-
-  # if set, enables rbac proxy on the controller and speaker to expose
-  # the metrics via tls.
-  # secureMetricsPort: 9120
-
-  # the name of the secret to be mounted in the speaker pod
-  # to expose the metrics securely. If not present, a self signed
-  # certificate to be used.
-  speakerMetricsTLSSecret: ""
-
-  # the name of the secret to be mounted in the controller pod
-  # to expose the metrics securely. If not present, a self signed
-  # certificate to be used.
-  controllerMetricsTLSSecret: ""
-
-  # prometheus doesn't have the permission to scrape all namespaces so we give it permission to scrape metallb's one
-  rbacPrometheus: true
-
-  # the service account used by prometheus
-  # required when " .Values.prometheus.rbacPrometheus == true " and " .Values.prometheus.podMonitor.enabled=true or prometheus.serviceMonitor.enabled=true "
-  serviceAccount: ""
-
-  # the namespace where prometheus is deployed
-  # required when " .Values.prometheus.rbacPrometheus == true " and " .Values.prometheus.podMonitor.enabled=true or prometheus.serviceMonitor.enabled=true "
-  namespace: ""
-
-  # the image to be used for the kuberbacproxy container
-  rbacProxy:
-    repository: gcr.io/kubebuilder/kube-rbac-proxy
-    tag: v0.12.0
-    pullPolicy:
-
-  # Prometheus Operator PodMonitors
+    rbacPrometheus: false
    podMonitor:
-    # enable support for Prometheus Operator
-    enabled: false
-
-    # optional additional labels for podMonitors
-    additionalLabels: {}
-
-    # optional annotations for podMonitors
-    annotations: {}
-
-    # Job label for scrape target
-    jobLabel: "app.kubernetes.io/name"
-
-    # Scrape interval. If not set, the Prometheus default scrape interval is used.
-    interval:
-
-    # 	metric relabel configs to apply to samples before ingestion.
-    metricRelabelings: []
-    # - action: keep
-    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
-    #   sourceLabels: [__name__]
-
-    # 	relabel configs to apply to samples before ingestion.
-    relabelings: []
-    # - sourceLabels: [__meta_kubernetes_pod_node_name]
-    #   separator: ;
-    #   regex: ^(.*)$
-    #   target_label: nodename
-    #   replacement: $1
-    #   action: replace
-
-  # Prometheus Operator ServiceMonitors. To be used as an alternative
-  # to podMonitor, supports secure metrics.
-  serviceMonitor:
-    # enable support for Prometheus Operator
-    enabled: false
-
-    speaker:
-      # optional additional labels for the speaker serviceMonitor
-      additionalLabels: {}
-      # optional additional annotations for the speaker serviceMonitor
-      annotations: {}
-      # optional tls configuration for the speaker serviceMonitor, in case
-      # secure metrics are enabled.
-      tlsConfig:
-        insecureSkipVerify: true
-
-    controller:
-      # optional additional labels for the controller serviceMonitor
-      additionalLabels: {}
-      # optional additional annotations for the controller serviceMonitor
-      annotations: {}
-      # optional tls configuration for the controller serviceMonitor, in case
-      # secure metrics are enabled.
-      tlsConfig:
-        insecureSkipVerify: true
-
-    # Job label for scrape target
-    jobLabel: "app.kubernetes.io/name"
-
-    # Scrape interval. If not set, the Prometheus default scrape interval is used.
-    interval:
-
-    # 	metric relabel configs to apply to samples before ingestion.
-    metricRelabelings: []
-    # - action: keep
-    #   regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
-    #   sourceLabels: [__name__]
-
-    # 	relabel configs to apply to samples before ingestion.
-    relabelings: []
-    # - sourceLabels: [__meta_kubernetes_pod_node_name]
-    #   separator: ;
-    #   regex: ^(.*)$
-    #   target_label: nodename
-    #   replacement: $1
-    #   action: replace
-
-  # Prometheus Operator alertmanager alerts
+      enabled: true
    prometheusRule:
-    # enable alertmanager alerts
-    enabled: false
-
-    # optional additional labels for prometheusRules
-    additionalLabels: {}
-
-    # optional annotations for prometheusRules
-    annotations: {}
-
-    # MetalLBStaleConfig
-    staleConfig:
      enabled: true
-      labels:
-        severity: warning
-
-    # MetalLBConfigNotLoaded
-    configNotLoaded:
-      enabled: true
-      labels:
-        severity: warning
-
-    # MetalLBAddressPoolExhausted
-    addressPoolExhausted:
-      enabled: true
-      labels:
-        severity: critical
-
-    addressPoolUsage:
-      enabled: true
-      thresholds:
-        - percent: 75
-          labels:
-            severity: warning
-        - percent: 85
-          labels:
-            severity: warning
-        - percent: 95
-          labels:
-            severity: critical
-
-    # MetalLBBGPSessionDown
-    bgpSessionDown:
-      enabled: true
-      labels:
-        severity: critical
-
-    extraAlerts: []
-
-# controller contains configuration specific to the MetalLB cluster
-# controller.
-controller:
-  enabled: true
-  # -- Controller log level. Must be one of: `all`, `debug`, `info`, `warn`, `error` or `none`
-  logLevel: info
-  # command: /controller
-  # webhookMode: enabled
-
-  ## @param controller.updateStrategy.type Metallb controller deployment strategy type.
-  ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy
-  ## e.g:
-  ## strategy:
-  ##  type: RollingUpdate
-  ##  rollingUpdate:
-  ##    maxSurge: 25%
-  ##    maxUnavailable: 25%
-  ##
-  strategy:
-    type: RollingUpdate
-  serviceAccount:
-    # Specifies whether a ServiceAccount should be created
-    create: true
-    # The name of the ServiceAccount to use. If not set and create is
-    # true, a name is generated using the fullname template
-    name: ""
-    annotations: {}
-  securityContext:
-    runAsNonRoot: true
-    # nobody
-    runAsUser: 65534
-    fsGroup: 65534
-  resources: {}
-    # limits:
-      # cpu: 100m
-      # memory: 100Mi
-  nodeSelector: {}
-  tolerations: []
-  priorityClassName: ""
-  runtimeClassName: ""
-  affinity: {}
-  podAnnotations: {}
-  labels: {}
-  livenessProbe:
-    enabled: true
-    failureThreshold: 3
-    initialDelaySeconds: 10
-    periodSeconds: 10
-    successThreshold: 1
-    timeoutSeconds: 1
-  readinessProbe:
-    enabled: true
-    failureThreshold: 3
-    initialDelaySeconds: 10
-    periodSeconds: 10
-    successThreshold: 1
-    timeoutSeconds: 1
-  tlsMinVersion: "VersionTLS12"
-  tlsCipherSuites: ""
-
-  extraContainers: []
-
-# speaker contains configuration specific to the MetalLB speaker
-# daemonset.
-speaker:
-  enabled: true
-  # command: /speaker
-  # -- Speaker log level. Must be one of: `all`, `debug`, `info`, `warn`, `error` or `none`
-  logLevel: info
-  tolerateMaster: true
-  memberlist:
-    enabled: true
-    mlBindPort: 7946
-    mlBindAddrOverride: ""
-    mlSecretKeyPath: "/etc/ml_secret_key"
-  excludeInterfaces:
-    enabled: true
-  # ignore the exclude-from-external-loadbalancer label
-  ignoreExcludeLB: false
-
-  ## @param speaker.updateStrategy.type Speaker daemonset strategy type
-  ## ref: https://kubernetes.io/docs/tasks/manage-daemon/update-daemon-set/
-  ##
-  updateStrategy:
-    ## StrategyType
-    ## Can be set to RollingUpdate or OnDelete
-    ##
-    type: RollingUpdate
-  serviceAccount:
-    # Specifies whether a ServiceAccount should be created
-    create: true
-    # The name of the ServiceAccount to use. If not set and create is
-    # true, a name is generated using the fullname template
-    name: ""
-    annotations: {}
-  securityContext: {}
-  ## Defines a secret name for the controller to generate a memberlist encryption secret
-  ## By default secretName: {{ "metallb.fullname" }}-memberlist
-  ##
-  # secretName:
-  resources: {}
-    # limits:
-      # cpu: 100m
-      # memory: 100Mi
-  nodeSelector: {}
-  tolerations: []
-  priorityClassName: ""
-  affinity: {}
-  ## Selects which runtime class will be used by the pod.
-  runtimeClassName: ""
-  podAnnotations: {}
-  labels: {}
-  livenessProbe:
-    enabled: true
-    failureThreshold: 3
-    initialDelaySeconds: 10
-    periodSeconds: 10
-    successThreshold: 1
-    timeoutSeconds: 1
-  readinessProbe:
-    enabled: true
-    failureThreshold: 3
-    initialDelaySeconds: 10
-    periodSeconds: 10
-    successThreshold: 1
-    timeoutSeconds: 1
-  startupProbe:
-    enabled: true
-    failureThreshold: 30
-    periodSeconds: 5
-  # frr contains configuration specific to the MetalLB FRR container,
-  # for speaker running alongside FRR.
-  frr:
-    enabled: false
-    metricsPort: 7473
-    resources: {}
-
-    # if set, enables a rbac proxy sidecar container on the speaker to
-    # expose the frr metrics via tls.
-    # secureMetricsPort: 9121
-
-
-  reloader:
-    resources: {}
-
-  frrMetrics:
-    resources: {}
-
-  extraContainers: []
-
-crds:
-  enabled: true
-  validationFailurePolicy: Fail
-
-# frrk8s contains the configuration related to using an frrk8s instance
-# (github.com/metallb/frr-k8s) as the backend for the BGP implementation.
-# This allows configuring additional frr parameters in combination to those
-# applied by MetalLB.
-frrk8s:
-  # if set, enables frrk8s as a backend. This is mutually exclusive to frr
-  # mode.
-  enabled: false
-  external: false
-  namespace: ""
--- a/system-apps/monitoring/Chart.yaml
+++ b/system-apps/monitoring/Chart.yaml
@@ -24,5 +24,5 @@ appVersion: "1.0"

 dependencies:
 - name: kube-prometheus-stack
-  version: 79.7.1
+  version: 79.9.0
  repository: https://prometheus-community.github.io/helm-charts
--- a/system-apps/rook-ceph/operator/templates/rules.yaml
+++ b/system-apps/rook-ceph/operator/templates/rules.yaml
@@ -544,15 +544,6 @@ spec:
          labels:
            severity: "warning"
            type: "ceph_default"
-        - alert: "CephNodeDiskspaceWarning"
-          annotations:
-            description: "Mountpoint {{ "{{" }} $labels.mountpoint {{ "}}" }} on {{ "{{" }} $labels.nodename {{ "}}" }} will be full in less than 5 days based on the 48 hour trailing fill rate."
-            summary: "Host filesystem free space is getting low"
-          expr: "predict_linear(node_filesystem_free_bytes{device=~\"/.*\"}[2d], 3600 * 24 * 5) *on(instance) group_left(nodename) node_uname_info < 0"
-          labels:
-            oid: "1.3.6.1.4.1.50495.1.2.1.8.4"
-            severity: "warning"
-            type: "ceph_default"
        - alert: "CephNodeInconsistentMTU"
          annotations:
            description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has a different MTU size ({{ "{{" }} $value {{ "}}" }}) than the median of devices named {{ "{{" }} $labels.device {{ "}}" }}."
--- a/system-apps/traefik/values.yaml
+++ b/system-apps/traefik/values.yaml
@@ -20,8 +20,20 @@ traefik:
    prometheus:
      service:
        enabled: true
+      serviceMonitor:
+        enabled: true
+      prometheusRule:
+        enabled: true
+        rules:
+          - alert: TraefikDown
+            expr: up{job="traefik"} == 0
+            for: 5m
            labels:
-          metrics_enabled: "true"
+              context: traefik
+              severity: warning
+            annotations:
+              summary: "Traefik Down"
+              description: "{{ $labels.pod }} on {{ $labels.nodename }} is down"
  deployment:
    kind: DaemonSet
    additionalContainers:
--- a/system-apps/velero/values.yaml
+++ b/system-apps/velero/values.yaml
@@ -2,8 +2,48 @@ velero:
  backupsEnabled: true
  snapshotsEnabled: false
  metrics:
+    serviceMonitor:
+      enabled: true
+    prometheusRule:
+      enabled: true
+      spec:
+      - alert: VeleroBackupFailed
+        annotations:
+          message: Velero backup {{ $labels.schedule }} has failed
+        expr: |-
+          velero_backup_last_status{schedule!=""} != 1
+        for: 15m
        labels:
-      metrics_enabled: "true"
+          severity: warning
+      - alert: VeleroBackupFailing
+        annotations:
+          message: Velero backup {{ $labels.schedule }} has been failing for the last 12h
+        expr: |-
+          velero_backup_last_status{schedule!=""} != 1
+        for: 12h
+        labels:
+          severity: critical
+      - alert: VeleroNoNewBackup
+        annotations:
+          message: Velero backup {{ $labels.schedule }} has not run successfully in the last 25h
+        expr: |-
+          (
+          (time() - velero_backup_last_successful_timestamp{schedule!=""}) >bool (25 * 3600)
+          or
+          absent(velero_backup_last_successful_timestamp{schedule!=""})
+          ) == 1
+        for: 1h
+        labels:
+          severity: critical
+      - alert: VeleroBackupPartialFailures
+        annotations:
+          message: Velero backup {{ $labels.schedule }} has {{ $value | humanizePercentage }} partialy failed backups
+        expr: |-
+          rate(velero_backup_partial_failure_total{schedule!=""}[25m])
+            / rate(velero_backup_attempt_total{schedule!=""}[25m]) > 0.5
+        for: 15m
+        labels:
+          severity: warning
  configuration:
    backupStorageLocation:
    - name: weyma-truenas
Author	SHA1	Message	Date
williamp	cc858dd8f3	Merge pull request 'chore(deps): update helm release argo-cd to v9.1.5' (#142 ) from renovate/argo-cd-9.x into main Reviewed-on: #142	2025-11-30 20:28:29 +00:00
Renovate Bot	5d71a0f199	chore(deps): update helm release argo-cd to v9.1.5	2025-11-30 20:00:21 +00:00
William P	a4d2f870d9	rm guestbook (the argo testing/example app)	2025-11-29 19:06:39 -05:00
William P	7136a0f322	velero: add alert rules	2025-11-29 18:45:28 -05:00
William P	c2d6c0c8bb	velero: enable metrics and prometheusrule	2025-11-29 18:38:43 -05:00
williamp	f3c3741409	Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v79.9.0' (#141 ) from renovate/kube-prometheus-stack-79.x into main Reviewed-on: #141	2025-11-29 00:36:23 +00:00
Renovate Bot	80b7cb2282	chore(deps): update helm release kube-prometheus-stack to v79.9.0	2025-11-28 17:00:22 +00:00
williamp	bf66dd0818	Merge pull request 'chore(deps): update ghcr.io/siderolabs/discovery-service docker tag to v1.0.12' (#140 ) from renovate/ghcr.io-siderolabs-discovery-service-1.x into main Reviewed-on: #140	2025-11-28 16:08:02 +00:00
William P	eea1c80a27	rook-ceph: rm CephNodeDiskspaceWarning due to improper, non-ceph related alerts	2025-11-28 10:40:41 -05:00
Renovate Bot	612dd16d4b	chore(deps): update ghcr.io/siderolabs/discovery-service docker tag to v1.0.12	2025-11-28 15:00:19 +00:00
williamp	341b402f0e	Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v79.8.2' (#139 ) from renovate/kube-prometheus-stack-79.x into main Reviewed-on: #139	2025-11-28 01:08:18 +00:00
Renovate Bot	76eaa1dd98	chore(deps): update helm release kube-prometheus-stack to v79.8.2	2025-11-26 02:00:19 +00:00
williamp	a730f43cbd	Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v79.8.1' (#138 ) from renovate/kube-prometheus-stack-79.x into main Reviewed-on: #138	2025-11-26 00:18:05 +00:00
Renovate Bot	4bd23be552	chore(deps): update helm release kube-prometheus-stack to v79.8.1	2025-11-25 21:00:19 +00:00
William P	6cd4b20970	metallb: no rbacPrometheus	2025-11-24 22:45:00 -05:00
William P	c3c66cb9e3	metallb: fix values	2025-11-24 22:43:52 -05:00
William P	b0fb79f7ea	traefik: fix prometheusrule	2025-11-24 22:39:00 -05:00
William P	624c5c7a8c	traefik: enable monitoring	2025-11-24 22:33:08 -05:00
William P	ebf8f25342	metallb: enable prometheusrules and servicemonitors	2025-11-24 22:31:16 -05:00
William P	87c5d94e0d	external-secrets: enable monitoring	2025-11-24 22:29:47 -05:00