Compare commits
32 Commits
c26ea4e139
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
b789b7be21
|
|||
|
3a2cfdb84e
|
|||
|
506c034948
|
|||
|
7cbc80906e
|
|||
|
3fd705520c
|
|||
|
94d65decd1
|
|||
|
e06a1be194
|
|||
|
dc926c31de
|
|||
|
af31507e8c
|
|||
|
c0ca549393
|
|||
|
a113c84c9d
|
|||
|
a7cc46ed8a
|
|||
|
54e6a76aab
|
|||
|
33ef2866e9
|
|||
|
b609e87dd3
|
|||
|
e1ffafc161
|
|||
| 4170dfa26c | |||
|
5fcb92ee8b
|
|||
|
c5acc2416f
|
|||
|
87b667b2ab
|
|||
| d68d2db3bc | |||
| ad68a17eb5 | |||
|
b07c7bf3a0
|
|||
|
78fc45ae6c
|
|||
| 2fa1594e99 | |||
| b211327516 | |||
|
6885ec790c
|
|||
|
664cace62e
|
|||
|
dae06b2c05
|
|||
|
583831273d
|
|||
|
f327b23001
|
|||
|
6f2603d3a0
|
@@ -14,6 +14,12 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"packageRules": [
|
"packageRules": [
|
||||||
|
{
|
||||||
|
"description": "Automerge patch updates",
|
||||||
|
"matchUpdateTypes": ["patch"],
|
||||||
|
"matchCurrentVersion": "!/^0/",
|
||||||
|
"automerge": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"description": "Rook Ceph - auto-update minor and patch versions only",
|
"description": "Rook Ceph - auto-update minor and patch versions only",
|
||||||
"matchDatasources": ["docker"],
|
"matchDatasources": ["docker"],
|
||||||
|
|||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: argo-cd
|
- name: argo-cd
|
||||||
version: 9.2.3
|
version: 9.3.4
|
||||||
repository: https://argoproj.github.io/argo-helm
|
repository: https://argoproj.github.io/argo-helm
|
||||||
@@ -56,18 +56,6 @@ argo-cd:
|
|||||||
Argo CD has not reported any applications data for the past 15 minutes which
|
Argo CD has not reported any applications data for the past 15 minutes which
|
||||||
means that it must be down or not functioning properly. This needs to be
|
means that it must be down or not functioning properly. This needs to be
|
||||||
resolved for this cloud to continue to maintain state.
|
resolved for this cloud to continue to maintain state.
|
||||||
- alert: ArgoAppNotSynced
|
|
||||||
expr: |
|
|
||||||
argocd_app_info{sync_status!="Synced"} == 1
|
|
||||||
for: 12h
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: '{{ $labels.name }} Application not synchronized'
|
|
||||||
description: >
|
|
||||||
The application {{ $labels.name }} has not been synchronized for over
|
|
||||||
12 hours which means that the state of this cloud has drifted away from the
|
|
||||||
state inside Git.
|
|
||||||
server:
|
server:
|
||||||
ingress:
|
ingress:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
|||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: external-secrets
|
- name: external-secrets
|
||||||
version: 1.2.0
|
version: 1.2.1
|
||||||
repository: https://charts.external-secrets.io
|
repository: https://charts.external-secrets.io
|
||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: kite
|
- name: kite
|
||||||
version: 0.7.6
|
version: 0.7.7
|
||||||
repository: https://zxh326.github.io/kite
|
repository: https://zxh326.github.io/kite
|
||||||
@@ -16,3 +16,5 @@ kite:
|
|||||||
paths:
|
paths:
|
||||||
- path: /
|
- path: /
|
||||||
pathType: ImplementationSpecific
|
pathType: ImplementationSpecific
|
||||||
|
podAnnotations:
|
||||||
|
backup.velero.io/backup-volumes: kite-storage
|
||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: kube-prometheus-stack
|
- name: kube-prometheus-stack
|
||||||
version: 80.9.1
|
version: 80.14.4
|
||||||
repository: https://prometheus-community.github.io/helm-charts
|
repository: https://prometheus-community.github.io/helm-charts
|
||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: rook-ceph
|
- name: rook-ceph
|
||||||
version: v1.18.8
|
version: v1.18.9
|
||||||
repository: https://charts.rook.io/release
|
repository: https://charts.rook.io/release
|
||||||
@@ -497,61 +497,6 @@ spec:
|
|||||||
oid: "1.3.6.1.4.1.50495.1.2.1.8.1"
|
oid: "1.3.6.1.4.1.50495.1.2.1.8.1"
|
||||||
severity: "critical"
|
severity: "critical"
|
||||||
type: "ceph_default"
|
type: "ceph_default"
|
||||||
- alert: "CephNodeNetworkPacketDrops"
|
|
||||||
annotations:
|
|
||||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet drop > 0.5% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}."
|
|
||||||
summary: "One or more NICs reports packet drops"
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
rate(node_network_receive_drop_total{device!="lo"}[1m]) +
|
|
||||||
rate(node_network_transmit_drop_total{device!="lo"}[1m])
|
|
||||||
) / (
|
|
||||||
rate(node_network_receive_packets_total{device!="lo"}[1m]) +
|
|
||||||
rate(node_network_transmit_packets_total{device!="lo"}[1m])
|
|
||||||
) >= 0.0050000000000000001 and (
|
|
||||||
rate(node_network_receive_drop_total{device!="lo"}[1m]) +
|
|
||||||
rate(node_network_transmit_drop_total{device!="lo"}[1m])
|
|
||||||
) >= 10
|
|
||||||
labels:
|
|
||||||
oid: "1.3.6.1.4.1.50495.1.2.1.8.2"
|
|
||||||
severity: "warning"
|
|
||||||
type: "ceph_default"
|
|
||||||
- alert: "CephNodeNetworkPacketErrors"
|
|
||||||
annotations:
|
|
||||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet errors > 0.01% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}."
|
|
||||||
summary: "One or more NICs reports packet errors"
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
rate(node_network_receive_errs_total{device!="lo"}[1m]) +
|
|
||||||
rate(node_network_transmit_errs_total{device!="lo"}[1m])
|
|
||||||
) / (
|
|
||||||
rate(node_network_receive_packets_total{device!="lo"}[1m]) +
|
|
||||||
rate(node_network_transmit_packets_total{device!="lo"}[1m])
|
|
||||||
) >= 0.0001 or (
|
|
||||||
rate(node_network_receive_errs_total{device!="lo"}[1m]) +
|
|
||||||
rate(node_network_transmit_errs_total{device!="lo"}[1m])
|
|
||||||
) >= 10
|
|
||||||
labels:
|
|
||||||
oid: "1.3.6.1.4.1.50495.1.2.1.8.3"
|
|
||||||
severity: "warning"
|
|
||||||
type: "ceph_default"
|
|
||||||
- alert: "CephNodeNetworkBondDegraded"
|
|
||||||
annotations:
|
|
||||||
description: "Bond {{ "{{" }} $labels.master {{ "}}" }} is degraded on Node {{ "{{" }} $labels.instance {{ "}}" }}."
|
|
||||||
summary: "Degraded Bond on Node {{ "{{" }} $labels.instance {{ "}}" }}"
|
|
||||||
expr: |
|
|
||||||
node_bonding_slaves - node_bonding_active != 0
|
|
||||||
labels:
|
|
||||||
severity: "warning"
|
|
||||||
type: "ceph_default"
|
|
||||||
- alert: "CephNodeInconsistentMTU"
|
|
||||||
annotations:
|
|
||||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has a different MTU size ({{ "{{" }} $value {{ "}}" }}) than the median of devices named {{ "{{" }} $labels.device {{ "}}" }}."
|
|
||||||
summary: "MTU settings across Ceph hosts are inconsistent"
|
|
||||||
expr: "node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( max by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )or node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( min by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )"
|
|
||||||
labels:
|
|
||||||
severity: "warning"
|
|
||||||
type: "ceph_default"
|
|
||||||
- name: "pools"
|
- name: "pools"
|
||||||
rules:
|
rules:
|
||||||
- alert: "CephPoolGrowthWarning"
|
- alert: "CephPoolGrowthWarning"
|
||||||
|
|||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: traefik
|
- name: traefik
|
||||||
version: 38.0.1
|
version: 38.0.2
|
||||||
repository: https://traefik.github.io/charts
|
repository: https://traefik.github.io/charts
|
||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: velero
|
- name: velero
|
||||||
version: 11.3.1
|
version: 11.3.2
|
||||||
repository: https://vmware-tanzu.github.io/helm-charts
|
repository: https://vmware-tanzu.github.io/helm-charts
|
||||||
Reference in New Issue
Block a user