velero: add alert rules
This commit is contained in:
@@ -6,6 +6,44 @@ velero:
|
|||||||
enabled: true
|
enabled: true
|
||||||
prometheusRule:
|
prometheusRule:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
spec:
|
||||||
|
- alert: VeleroBackupFailed
|
||||||
|
annotations:
|
||||||
|
message: Velero backup {{ $labels.schedule }} has failed
|
||||||
|
expr: |-
|
||||||
|
velero_backup_last_status{schedule!=""} != 1
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: VeleroBackupFailing
|
||||||
|
annotations:
|
||||||
|
message: Velero backup {{ $labels.schedule }} has been failing for the last 12h
|
||||||
|
expr: |-
|
||||||
|
velero_backup_last_status{schedule!=""} != 1
|
||||||
|
for: 12h
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: VeleroNoNewBackup
|
||||||
|
annotations:
|
||||||
|
message: Velero backup {{ $labels.schedule }} has not run successfully in the last 25h
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
(time() - velero_backup_last_successful_timestamp{schedule!=""}) >bool (25 * 3600)
|
||||||
|
or
|
||||||
|
absent(velero_backup_last_successful_timestamp{schedule!=""})
|
||||||
|
) == 1
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
- alert: VeleroBackupPartialFailures
|
||||||
|
annotations:
|
||||||
|
message: Velero backup {{ $labels.schedule }} has {{ $value | humanizePercentage }} partialy failed backups
|
||||||
|
expr: |-
|
||||||
|
rate(velero_backup_partial_failure_total{schedule!=""}[25m])
|
||||||
|
/ rate(velero_backup_attempt_total{schedule!=""}[25m]) > 0.5
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
configuration:
|
configuration:
|
||||||
backupStorageLocation:
|
backupStorageLocation:
|
||||||
- name: weyma-truenas
|
- name: weyma-truenas
|
||||||
|
|||||||
Reference in New Issue
Block a user