velero: add alert rules

This commit is contained in:
2025-11-29 18:45:28 -05:00
parent c2d6c0c8bb
commit 7136a0f322

View File

@@ -6,6 +6,44 @@ velero:
enabled: true
prometheusRule:
enabled: true
spec:
- alert: VeleroBackupFailed
annotations:
message: Velero backup {{ $labels.schedule }} has failed
expr: |-
velero_backup_last_status{schedule!=""} != 1
for: 15m
labels:
severity: warning
- alert: VeleroBackupFailing
annotations:
message: Velero backup {{ $labels.schedule }} has been failing for the last 12h
expr: |-
velero_backup_last_status{schedule!=""} != 1
for: 12h
labels:
severity: critical
- alert: VeleroNoNewBackup
annotations:
message: Velero backup {{ $labels.schedule }} has not run successfully in the last 25h
expr: |-
(
(time() - velero_backup_last_successful_timestamp{schedule!=""}) >bool (25 * 3600)
or
absent(velero_backup_last_successful_timestamp{schedule!=""})
) == 1
for: 1h
labels:
severity: critical
- alert: VeleroBackupPartialFailures
annotations:
message: Velero backup {{ $labels.schedule }} has {{ $value | humanizePercentage }} partialy failed backups
expr: |-
rate(velero_backup_partial_failure_total{schedule!=""}[25m])
/ rate(velero_backup_attempt_total{schedule!=""}[25m]) > 0.5
for: 15m
labels:
severity: warning
configuration:
backupStorageLocation:
- name: weyma-truenas