From 7136a0f322f231745da84b1aa489de5469feebf3 Mon Sep 17 00:00:00 2001 From: William P Date: Sat, 29 Nov 2025 18:45:28 -0500 Subject: [PATCH] velero: add alert rules --- system-apps/velero/values.yaml | 38 ++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/system-apps/velero/values.yaml b/system-apps/velero/values.yaml index 580e3e0..5ed27a6 100644 --- a/system-apps/velero/values.yaml +++ b/system-apps/velero/values.yaml @@ -6,6 +6,44 @@ velero: enabled: true prometheusRule: enabled: true + spec: + - alert: VeleroBackupFailed + annotations: + message: Velero backup {{ $labels.schedule }} has failed + expr: |- + velero_backup_last_status{schedule!=""} != 1 + for: 15m + labels: + severity: warning + - alert: VeleroBackupFailing + annotations: + message: Velero backup {{ $labels.schedule }} has been failing for the last 12h + expr: |- + velero_backup_last_status{schedule!=""} != 1 + for: 12h + labels: + severity: critical + - alert: VeleroNoNewBackup + annotations: + message: Velero backup {{ $labels.schedule }} has not run successfully in the last 25h + expr: |- + ( + (time() - velero_backup_last_successful_timestamp{schedule!=""}) >bool (25 * 3600) + or + absent(velero_backup_last_successful_timestamp{schedule!=""}) + ) == 1 + for: 1h + labels: + severity: critical + - alert: VeleroBackupPartialFailures + annotations: + message: Velero backup {{ $labels.schedule }} has {{ $value | humanizePercentage }} partialy failed backups + expr: |- + rate(velero_backup_partial_failure_total{schedule!=""}[25m]) + / rate(velero_backup_attempt_total{schedule!=""}[25m]) > 0.5 + for: 15m + labels: + severity: warning configuration: backupStorageLocation: - name: weyma-truenas