Compare commits
40 Commits
a08e9930d5
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
b789b7be21
|
|||
|
3a2cfdb84e
|
|||
|
506c034948
|
|||
|
7cbc80906e
|
|||
|
3fd705520c
|
|||
|
94d65decd1
|
|||
|
e06a1be194
|
|||
|
dc926c31de
|
|||
|
af31507e8c
|
|||
|
c0ca549393
|
|||
|
a113c84c9d
|
|||
|
a7cc46ed8a
|
|||
|
54e6a76aab
|
|||
|
33ef2866e9
|
|||
|
b609e87dd3
|
|||
|
e1ffafc161
|
|||
| 4170dfa26c | |||
|
5fcb92ee8b
|
|||
|
c5acc2416f
|
|||
|
87b667b2ab
|
|||
| d68d2db3bc | |||
| ad68a17eb5 | |||
|
b07c7bf3a0
|
|||
|
78fc45ae6c
|
|||
| 2fa1594e99 | |||
| b211327516 | |||
|
6885ec790c
|
|||
|
664cace62e
|
|||
|
dae06b2c05
|
|||
|
583831273d
|
|||
|
f327b23001
|
|||
|
6f2603d3a0
|
|||
| c26ea4e139 | |||
| b521924f00 | |||
|
19f203e374
|
|||
|
bb251462fb
|
|||
|
9a9d108e7c
|
|||
|
70d5ae2e48
|
|||
| e6e25baee1 | |||
|
94bb98b4ed
|
@@ -291,8 +291,8 @@ patches:
|
||||
kind: ControlPlane
|
||||
machines:
|
||||
- 20b4c826-e699-43b3-826d-73eb5173680b
|
||||
- 30303031-3030-3030-6335-303731636665
|
||||
- 5fdea709-56ad-45f2-966d-5e344dbe4fdf
|
||||
- 30303031-3030-3030-6335-303731636665
|
||||
---
|
||||
kind: Workers
|
||||
machines:
|
||||
@@ -306,6 +306,8 @@ kind: Machine
|
||||
name: 02c02200-f403-11ef-9372-70f446672600
|
||||
patches:
|
||||
- idOverride: 400-cm-02c02200-f403-11ef-9372-70f446672600
|
||||
annotations:
|
||||
name: ""
|
||||
inline:
|
||||
machine:
|
||||
network:
|
||||
@@ -329,14 +331,11 @@ patches:
|
||||
interface: br0
|
||||
---
|
||||
kind: Machine
|
||||
systemExtensions:
|
||||
- siderolabs/i915
|
||||
- siderolabs/nut-client
|
||||
name: 03000200-0400-0500-0006-000700080009
|
||||
install:
|
||||
disk: /dev/sda
|
||||
patches:
|
||||
- idOverride: 400-cm-03000200-0400-0500-0006-000700080009
|
||||
annotations:
|
||||
name: ""
|
||||
inline:
|
||||
machine:
|
||||
network:
|
||||
@@ -363,6 +362,8 @@ kind: Machine
|
||||
name: 1006b91a-ecbf-11ea-aed4-046ba1ee3700
|
||||
patches:
|
||||
- idOverride: 400-cm-1006b91a-ecbf-11ea-aed4-046ba1ee3700
|
||||
annotations:
|
||||
name: ""
|
||||
inline:
|
||||
machine:
|
||||
network:
|
||||
@@ -386,22 +387,6 @@ patches:
|
||||
interface: br0
|
||||
---
|
||||
kind: Machine
|
||||
name: 20b4c826-e699-43b3-826d-73eb5173680b
|
||||
patches:
|
||||
- idOverride: 400-cm-20b4c826-e699-43b3-826d-73eb5173680b
|
||||
inline:
|
||||
machine:
|
||||
network:
|
||||
hostname: weyma-talos-cp02
|
||||
interfaces:
|
||||
- deviceSelector:
|
||||
driver: virtio*
|
||||
hardwareAddr: 00:16:3e:9c:01:27
|
||||
dhcp: true
|
||||
---
|
||||
kind: Machine
|
||||
systemExtensions:
|
||||
- siderolabs/nut-client
|
||||
name: 30303031-3030-3030-6335-303731636665
|
||||
patches:
|
||||
- idOverride: 400-cm-30303031-3030-3030-6335-303731636665
|
||||
@@ -415,9 +400,27 @@ patches:
|
||||
dhcp: true
|
||||
---
|
||||
kind: Machine
|
||||
name: 20b4c826-e699-43b3-826d-73eb5173680b
|
||||
patches:
|
||||
- idOverride: 400-cm-20b4c826-e699-43b3-826d-73eb5173680b
|
||||
annotations:
|
||||
name: ""
|
||||
inline:
|
||||
machine:
|
||||
network:
|
||||
hostname: weyma-talos-cp02
|
||||
interfaces:
|
||||
- deviceSelector:
|
||||
driver: virtio*
|
||||
hardwareAddr: 00:16:3e:9c:01:27
|
||||
dhcp: true
|
||||
---
|
||||
kind: Machine
|
||||
name: 5f0cd701-0784-4fcc-8e52-3b3304049972
|
||||
patches:
|
||||
- idOverride: 400-cm-5f0cd701-0784-4fcc-8e52-3b3304049972
|
||||
annotations:
|
||||
name: ""
|
||||
inline:
|
||||
machine:
|
||||
network:
|
||||
@@ -446,6 +449,8 @@ systemExtensions:
|
||||
name: 5fdea709-56ad-45f2-966d-5e344dbe4fdf
|
||||
patches:
|
||||
- idOverride: 400-cm-5fdea709-56ad-45f2-966d-5e344dbe4fdf
|
||||
annotations:
|
||||
name: ""
|
||||
inline:
|
||||
machine:
|
||||
network:
|
||||
@@ -460,6 +465,8 @@ kind: Machine
|
||||
name: da507021-8912-4337-86a3-94a05bd1cf05
|
||||
patches:
|
||||
- idOverride: 400-cm-da507021-8912-4337-86a3-94a05bd1cf05
|
||||
annotations:
|
||||
name: ""
|
||||
inline:
|
||||
machine:
|
||||
network:
|
||||
|
||||
@@ -14,6 +14,12 @@
|
||||
}
|
||||
],
|
||||
"packageRules": [
|
||||
{
|
||||
"description": "Automerge patch updates",
|
||||
"matchUpdateTypes": ["patch"],
|
||||
"matchCurrentVersion": "!/^0/",
|
||||
"automerge": true
|
||||
},
|
||||
{
|
||||
"description": "Rook Ceph - auto-update minor and patch versions only",
|
||||
"matchDatasources": ["docker"],
|
||||
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: argo-cd
|
||||
version: 9.2.0
|
||||
version: 9.3.4
|
||||
repository: https://argoproj.github.io/argo-helm
|
||||
@@ -56,18 +56,6 @@ argo-cd:
|
||||
Argo CD has not reported any applications data for the past 15 minutes which
|
||||
means that it must be down or not functioning properly. This needs to be
|
||||
resolved for this cloud to continue to maintain state.
|
||||
- alert: ArgoAppNotSynced
|
||||
expr: |
|
||||
argocd_app_info{sync_status!="Synced"} == 1
|
||||
for: 12h
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: '{{ $labels.name }} Application not synchronized'
|
||||
description: >
|
||||
The application {{ $labels.name }} has not been synchronized for over
|
||||
12 hours which means that the state of this cloud has drifted away from the
|
||||
state inside Git.
|
||||
server:
|
||||
ingress:
|
||||
enabled: true
|
||||
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: external-secrets
|
||||
version: 1.2.0
|
||||
version: 1.2.1
|
||||
repository: https://charts.external-secrets.io
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: kite
|
||||
version: 0.7.6
|
||||
version: 0.7.7
|
||||
repository: https://zxh326.github.io/kite
|
||||
@@ -15,4 +15,6 @@ kite:
|
||||
- host: weyma-kite.infra.dubyatp.xyz
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
pathType: ImplementationSpecific
|
||||
podAnnotations:
|
||||
backup.velero.io/backup-volumes: kite-storage
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: kube-prometheus-stack
|
||||
version: 80.8.0
|
||||
version: 80.14.4
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: rook-ceph
|
||||
version: v1.18.8
|
||||
version: v1.18.9
|
||||
repository: https://charts.rook.io/release
|
||||
@@ -497,61 +497,6 @@ spec:
|
||||
oid: "1.3.6.1.4.1.50495.1.2.1.8.1"
|
||||
severity: "critical"
|
||||
type: "ceph_default"
|
||||
- alert: "CephNodeNetworkPacketDrops"
|
||||
annotations:
|
||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet drop > 0.5% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}."
|
||||
summary: "One or more NICs reports packet drops"
|
||||
expr: |
|
||||
(
|
||||
rate(node_network_receive_drop_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_drop_total{device!="lo"}[1m])
|
||||
) / (
|
||||
rate(node_network_receive_packets_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_packets_total{device!="lo"}[1m])
|
||||
) >= 0.0050000000000000001 and (
|
||||
rate(node_network_receive_drop_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_drop_total{device!="lo"}[1m])
|
||||
) >= 10
|
||||
labels:
|
||||
oid: "1.3.6.1.4.1.50495.1.2.1.8.2"
|
||||
severity: "warning"
|
||||
type: "ceph_default"
|
||||
- alert: "CephNodeNetworkPacketErrors"
|
||||
annotations:
|
||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet errors > 0.01% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}."
|
||||
summary: "One or more NICs reports packet errors"
|
||||
expr: |
|
||||
(
|
||||
rate(node_network_receive_errs_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_errs_total{device!="lo"}[1m])
|
||||
) / (
|
||||
rate(node_network_receive_packets_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_packets_total{device!="lo"}[1m])
|
||||
) >= 0.0001 or (
|
||||
rate(node_network_receive_errs_total{device!="lo"}[1m]) +
|
||||
rate(node_network_transmit_errs_total{device!="lo"}[1m])
|
||||
) >= 10
|
||||
labels:
|
||||
oid: "1.3.6.1.4.1.50495.1.2.1.8.3"
|
||||
severity: "warning"
|
||||
type: "ceph_default"
|
||||
- alert: "CephNodeNetworkBondDegraded"
|
||||
annotations:
|
||||
description: "Bond {{ "{{" }} $labels.master {{ "}}" }} is degraded on Node {{ "{{" }} $labels.instance {{ "}}" }}."
|
||||
summary: "Degraded Bond on Node {{ "{{" }} $labels.instance {{ "}}" }}"
|
||||
expr: |
|
||||
node_bonding_slaves - node_bonding_active != 0
|
||||
labels:
|
||||
severity: "warning"
|
||||
type: "ceph_default"
|
||||
- alert: "CephNodeInconsistentMTU"
|
||||
annotations:
|
||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has a different MTU size ({{ "{{" }} $value {{ "}}" }}) than the median of devices named {{ "{{" }} $labels.device {{ "}}" }}."
|
||||
summary: "MTU settings across Ceph hosts are inconsistent"
|
||||
expr: "node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( max by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )or node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( min by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )"
|
||||
labels:
|
||||
severity: "warning"
|
||||
type: "ceph_default"
|
||||
- name: "pools"
|
||||
rules:
|
||||
- alert: "CephPoolGrowthWarning"
|
||||
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: traefik
|
||||
version: 38.0.1
|
||||
version: 38.0.2
|
||||
repository: https://traefik.github.io/charts
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: velero
|
||||
version: 11.2.0
|
||||
version: 11.3.2
|
||||
repository: https://vmware-tanzu.github.io/helm-charts
|
||||
Reference in New Issue
Block a user