Compare commits
36 Commits
19f203e374
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
b789b7be21
|
|||
|
3a2cfdb84e
|
|||
|
506c034948
|
|||
|
7cbc80906e
|
|||
|
3fd705520c
|
|||
|
94d65decd1
|
|||
|
e06a1be194
|
|||
|
dc926c31de
|
|||
|
af31507e8c
|
|||
|
c0ca549393
|
|||
|
a113c84c9d
|
|||
|
a7cc46ed8a
|
|||
|
54e6a76aab
|
|||
|
33ef2866e9
|
|||
|
b609e87dd3
|
|||
|
e1ffafc161
|
|||
| 4170dfa26c | |||
|
5fcb92ee8b
|
|||
|
c5acc2416f
|
|||
|
87b667b2ab
|
|||
| d68d2db3bc | |||
| ad68a17eb5 | |||
|
b07c7bf3a0
|
|||
|
78fc45ae6c
|
|||
| 2fa1594e99 | |||
| b211327516 | |||
|
6885ec790c
|
|||
|
664cace62e
|
|||
|
dae06b2c05
|
|||
|
583831273d
|
|||
|
f327b23001
|
|||
|
6f2603d3a0
|
|||
| c26ea4e139 | |||
| b521924f00 | |||
|
9a9d108e7c
|
|||
|
70d5ae2e48
|
@@ -14,6 +14,12 @@
|
||||
}
|
||||
],
|
||||
"packageRules": [
|
||||
{
|
||||
"description": "Automerge patch updates",
|
||||
"matchUpdateTypes": ["patch"],
|
||||
"matchCurrentVersion": "!/^0/",
|
||||
"automerge": true
|
||||
},
|
||||
{
|
||||
"description": "Rook Ceph - auto-update minor and patch versions only",
|
||||
"matchDatasources": ["docker"],
|
||||
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: argo-cd
|
||||
version: 9.2.3
|
||||
version: 9.3.4
|
||||
repository: https://argoproj.github.io/argo-helm
|
||||
@@ -56,18 +56,6 @@ argo-cd:
|
||||
Argo CD has not reported any applications data for the past 15 minutes which
|
||||
means that it must be down or not functioning properly. This needs to be
|
||||
resolved for this cloud to continue to maintain state.
|
||||
- alert: ArgoAppNotSynced
|
||||
expr: |
|
||||
argocd_app_info{sync_status!="Synced"} == 1
|
||||
for: 12h
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: '{{ $labels.name }} Application not synchronized'
|
||||
description: >
|
||||
The application {{ $labels.name }} has not been synchronized for over
|
||||
12 hours which means that the state of this cloud has drifted away from the
|
||||
state inside Git.
|
||||
server:
|
||||
ingress:
|
||||
enabled: true
|
||||
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: external-secrets
|
||||
version: 1.2.0
|
||||
version: 1.2.1
|
||||
repository: https://charts.external-secrets.io
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: kite
|
||||
version: 0.7.6
|
||||
version: 0.7.7
|
||||
repository: https://zxh326.github.io/kite
|
||||
@@ -15,4 +15,6 @@ kite:
|
||||
- host: weyma-kite.infra.dubyatp.xyz
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
pathType: ImplementationSpecific
|
||||
podAnnotations:
|
||||
backup.velero.io/backup-volumes: kite-storage
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: kube-prometheus-stack
|
||||
version: 80.8.0
|
||||
version: 80.14.4
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: rook-ceph
|
||||
version: v1.18.8
|
||||
version: v1.18.9
|
||||
repository: https://charts.rook.io/release
|
||||
@@ -497,61 +497,6 @@ spec:
|
||||
oid: "1.3.6.1.4.1.50495.1.2.1.8.1"
|
||||
severity: "critical"
|
||||
type: "ceph_default"
|
||||
- alert: "CephNodeNetworkPacketDrops"
|
||||
annotations:
|
||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet drop > 0.5% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}."
|
||||
summary: "One or more NICs reports packet drops"
|
||||
expr: |
|
||||
(
|
||||
rate(node_network_receive_drop_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_drop_total{device!="lo"}[1m])
|
||||
) / (
|
||||
rate(node_network_receive_packets_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_packets_total{device!="lo"}[1m])
|
||||
) >= 0.0050000000000000001 and (
|
||||
rate(node_network_receive_drop_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_drop_total{device!="lo"}[1m])
|
||||
) >= 10
|
||||
labels:
|
||||
oid: "1.3.6.1.4.1.50495.1.2.1.8.2"
|
||||
severity: "warning"
|
||||
type: "ceph_default"
|
||||
- alert: "CephNodeNetworkPacketErrors"
|
||||
annotations:
|
||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet errors > 0.01% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}."
|
||||
summary: "One or more NICs reports packet errors"
|
||||
expr: |
|
||||
(
|
||||
rate(node_network_receive_errs_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_errs_total{device!="lo"}[1m])
|
||||
) / (
|
||||
rate(node_network_receive_packets_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_packets_total{device!="lo"}[1m])
|
||||
) >= 0.0001 or (
|
||||
rate(node_network_receive_errs_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_errs_total{device!="lo"}[1m])
|
||||
) >= 10
|
||||
labels:
|
||||
oid: "1.3.6.1.4.1.50495.1.2.1.8.3"
|
||||
severity: "warning"
|
||||
type: "ceph_default"
|
||||
- alert: "CephNodeNetworkBondDegraded"
|
||||
annotations:
|
||||
description: "Bond {{ "{{" }} $labels.master {{ "}}" }} is degraded on Node {{ "{{" }} $labels.instance {{ "}}" }}."
|
||||
summary: "Degraded Bond on Node {{ "{{" }} $labels.instance {{ "}}" }}"
|
||||
expr: |
|
||||
node_bonding_slaves - node_bonding_active != 0
|
||||
labels:
|
||||
severity: "warning"
|
||||
type: "ceph_default"
|
||||
- alert: "CephNodeInconsistentMTU"
|
||||
annotations:
|
||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has a different MTU size ({{ "{{" }} $value {{ "}}" }}) than the median of devices named {{ "{{" }} $labels.device {{ "}}" }}."
|
||||
summary: "MTU settings across Ceph hosts are inconsistent"
|
||||
expr: "node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( max by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )or node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( min by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )"
|
||||
labels:
|
||||
severity: "warning"
|
||||
type: "ceph_default"
|
||||
- name: "pools"
|
||||
rules:
|
||||
- alert: "CephPoolGrowthWarning"
|
||||
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: traefik
|
||||
version: 38.0.1
|
||||
version: 38.0.2
|
||||
repository: https://traefik.github.io/charts
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: velero
|
||||
version: 11.2.0
|
||||
version: 11.3.2
|
||||
repository: https://vmware-tanzu.github.io/helm-charts
|
||||
Reference in New Issue
Block a user