velero: add alert rules
This commit is contained in:
@@ -6,6 +6,44 @@ velero:
|
||||
enabled: true
|
||||
prometheusRule:
|
||||
enabled: true
|
||||
spec:
|
||||
- alert: VeleroBackupFailed
|
||||
annotations:
|
||||
message: Velero backup {{ $labels.schedule }} has failed
|
||||
expr: |-
|
||||
velero_backup_last_status{schedule!=""} != 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: VeleroBackupFailing
|
||||
annotations:
|
||||
message: Velero backup {{ $labels.schedule }} has been failing for the last 12h
|
||||
expr: |-
|
||||
velero_backup_last_status{schedule!=""} != 1
|
||||
for: 12h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: VeleroNoNewBackup
|
||||
annotations:
|
||||
message: Velero backup {{ $labels.schedule }} has not run successfully in the last 25h
|
||||
expr: |-
|
||||
(
|
||||
(time() - velero_backup_last_successful_timestamp{schedule!=""}) >bool (25 * 3600)
|
||||
or
|
||||
absent(velero_backup_last_successful_timestamp{schedule!=""})
|
||||
) == 1
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: VeleroBackupPartialFailures
|
||||
annotations:
|
||||
message: Velero backup {{ $labels.schedule }} has {{ $value | humanizePercentage }} partialy failed backups
|
||||
expr: |-
|
||||
rate(velero_backup_partial_failure_total{schedule!=""}[25m])
|
||||
/ rate(velero_backup_attempt_total{schedule!=""}[25m]) > 0.5
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
configuration:
|
||||
backupStorageLocation:
|
||||
- name: weyma-truenas
|
||||
|
||||
Reference in New Issue
Block a user