Compare commits

...

42 Commits

Author SHA1 Message Date
b789b7be21 chore(deps): update helm release kube-prometheus-stack to v80.14.4 2026-01-15 15:00:53 +00:00
3a2cfdb84e chore(deps): update helm release argo-cd to v9.3.4 2026-01-14 22:00:23 +00:00
506c034948 argocd: remove unnecessary and annoying prometheus alert 2026-01-14 13:27:50 -05:00
7cbc80906e chore(deps): update helm release kube-prometheus-stack to v80.14.3 2026-01-14 15:00:53 +00:00
3fd705520c chore(deps): update helm release kube-prometheus-stack to v80.14.2 2026-01-14 04:00:53 +00:00
94d65decd1 chore(deps): update helm release argo-cd to v9.3.3 2026-01-13 23:00:55 +00:00
e06a1be194 chore(deps): update helm release kube-prometheus-stack to v80.14.1 2026-01-13 21:01:40 +00:00
dc926c31de chore(deps): update helm release rook-ceph to v1.18.9 2026-01-13 20:00:23 +00:00
af31507e8c Merge branch 'renovate/argo-cd-9.x' 2026-01-13 11:28:24 -05:00
c0ca549393 Merge branch 'renovate/kube-prometheus-stack-80.x' 2026-01-13 11:26:09 -05:00
a113c84c9d chore(deps): update helm release kube-prometheus-stack to v80.14.0 2026-01-13 16:00:44 +00:00
a7cc46ed8a chore(deps): update helm release argo-cd to v9.3.1 2026-01-13 11:00:29 +00:00
54e6a76aab chore(deps): update helm release kube-prometheus-stack to v80.13.3 2026-01-09 10:00:58 +00:00
33ef2866e9 chore(deps): update helm release traefik to v38.0.2 2026-01-08 09:00:26 +00:00
b609e87dd3 chore(deps): update helm release kube-prometheus-stack to v80.13.2 2026-01-07 23:00:59 +00:00
e1ffafc161 chore(deps): update helm release kube-prometheus-stack to v80.13.1 2026-01-07 18:00:29 +00:00
4170dfa26c Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.13.0' (#180) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #180
2026-01-06 22:56:17 +00:00
5fcb92ee8b chore(deps): update helm release kube-prometheus-stack to v80.13.0 2026-01-06 22:00:55 +00:00
c5acc2416f chore(deps): update helm release velero to v11.3.2 2026-01-06 12:00:56 +00:00
87b667b2ab chore(deps): update helm release kube-prometheus-stack to v80.11.1 2026-01-06 11:00:55 +00:00
d68d2db3bc Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.11.0' (#177) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #177
2026-01-06 01:46:43 +00:00
ad68a17eb5 Merge pull request 'chore(deps): update helm release kite to v0.7.7' (#176) from renovate/kite-0.x into main
Reviewed-on: #176
2026-01-06 01:46:28 +00:00
b07c7bf3a0 chore(deps): update helm release kube-prometheus-stack to v80.11.0 2026-01-05 15:00:58 +00:00
78fc45ae6c chore(deps): update helm release kite to v0.7.7 2026-01-05 13:00:27 +00:00
2fa1594e99 Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.10.0' (#173) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #173
2026-01-03 16:53:26 +00:00
b211327516 chore(deps): update helm release external-secrets to v1.2.1 2026-01-02 23:00:55 +00:00
6885ec790c chore(deps): update helm release argo-cd to v9.2.4 2026-01-02 23:00:27 +00:00
664cace62e chore(deps): update helm release kube-prometheus-stack to v80.10.0 2026-01-02 18:00:26 +00:00
dae06b2c05 chore(deps): update helm release kube-prometheus-stack to v80.9.2 2025-12-31 11:00:57 +00:00
583831273d kite: create volume backups 2025-12-31 00:15:20 -05:00
f327b23001 try automerging patches 2025-12-31 00:01:26 -05:00
6f2603d3a0 remove redundant node alerts 2025-12-30 23:50:18 -05:00
c26ea4e139 Merge pull request 'chore(deps): update helm release velero to v11.3.1' (#170) from renovate/velero-11.x into main
Reviewed-on: #170
2025-12-31 04:44:22 +00:00
b521924f00 Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.9.1' (#171) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #171
2025-12-31 04:44:07 +00:00
19f203e374 re-add weyma-talos-cp04 2025-12-30 23:43:11 -05:00
bb251462fb update omni cluster template 2025-12-30 22:44:13 -05:00
9a9d108e7c chore(deps): update helm release kube-prometheus-stack to v80.9.1 2025-12-30 19:00:56 +00:00
70d5ae2e48 chore(deps): update helm release velero to v11.3.1 2025-12-29 11:00:24 +00:00
e6e25baee1 Merge pull request 'chore(deps): update helm release argo-cd to v9.2.3' (#168) from renovate/argo-cd-9.x into main
Reviewed-on: #168
2025-12-29 00:55:53 +00:00
a08e9930d5 Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.8.0' (#169) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #169
2025-12-29 00:55:41 +00:00
94bb98b4ed chore(deps): update helm release argo-cd to v9.2.3 2025-12-28 13:00:26 +00:00
07f863b0a7 chore(deps): update helm release kube-prometheus-stack to v80.8.0 2025-12-28 01:08:47 +00:00
12 changed files with 45 additions and 97 deletions

View File

@@ -291,8 +291,8 @@ patches:
kind: ControlPlane
machines:
- 20b4c826-e699-43b3-826d-73eb5173680b
- 30303031-3030-3030-6335-303731636665
- 5fdea709-56ad-45f2-966d-5e344dbe4fdf
- 30303031-3030-3030-6335-303731636665
---
kind: Workers
machines:
@@ -306,6 +306,8 @@ kind: Machine
name: 02c02200-f403-11ef-9372-70f446672600
patches:
- idOverride: 400-cm-02c02200-f403-11ef-9372-70f446672600
annotations:
name: ""
inline:
machine:
network:
@@ -329,14 +331,11 @@ patches:
interface: br0
---
kind: Machine
systemExtensions:
- siderolabs/i915
- siderolabs/nut-client
name: 03000200-0400-0500-0006-000700080009
install:
disk: /dev/sda
patches:
- idOverride: 400-cm-03000200-0400-0500-0006-000700080009
annotations:
name: ""
inline:
machine:
network:
@@ -363,6 +362,8 @@ kind: Machine
name: 1006b91a-ecbf-11ea-aed4-046ba1ee3700
patches:
- idOverride: 400-cm-1006b91a-ecbf-11ea-aed4-046ba1ee3700
annotations:
name: ""
inline:
machine:
network:
@@ -386,22 +387,6 @@ patches:
interface: br0
---
kind: Machine
name: 20b4c826-e699-43b3-826d-73eb5173680b
patches:
- idOverride: 400-cm-20b4c826-e699-43b3-826d-73eb5173680b
inline:
machine:
network:
hostname: weyma-talos-cp02
interfaces:
- deviceSelector:
driver: virtio*
hardwareAddr: 00:16:3e:9c:01:27
dhcp: true
---
kind: Machine
systemExtensions:
- siderolabs/nut-client
name: 30303031-3030-3030-6335-303731636665
patches:
- idOverride: 400-cm-30303031-3030-3030-6335-303731636665
@@ -415,9 +400,27 @@ patches:
dhcp: true
---
kind: Machine
name: 20b4c826-e699-43b3-826d-73eb5173680b
patches:
- idOverride: 400-cm-20b4c826-e699-43b3-826d-73eb5173680b
annotations:
name: ""
inline:
machine:
network:
hostname: weyma-talos-cp02
interfaces:
- deviceSelector:
driver: virtio*
hardwareAddr: 00:16:3e:9c:01:27
dhcp: true
---
kind: Machine
name: 5f0cd701-0784-4fcc-8e52-3b3304049972
patches:
- idOverride: 400-cm-5f0cd701-0784-4fcc-8e52-3b3304049972
annotations:
name: ""
inline:
machine:
network:
@@ -446,6 +449,8 @@ systemExtensions:
name: 5fdea709-56ad-45f2-966d-5e344dbe4fdf
patches:
- idOverride: 400-cm-5fdea709-56ad-45f2-966d-5e344dbe4fdf
annotations:
name: ""
inline:
machine:
network:
@@ -460,6 +465,8 @@ kind: Machine
name: da507021-8912-4337-86a3-94a05bd1cf05
patches:
- idOverride: 400-cm-da507021-8912-4337-86a3-94a05bd1cf05
annotations:
name: ""
inline:
machine:
network:

View File

@@ -14,6 +14,12 @@
}
],
"packageRules": [
{
"description": "Automerge patch updates",
"matchUpdateTypes": ["patch"],
"matchCurrentVersion": "!/^0/",
"automerge": true
},
{
"description": "Rook Ceph - auto-update minor and patch versions only",
"matchDatasources": ["docker"],

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: argo-cd
version: 9.2.0
version: 9.3.4
repository: https://argoproj.github.io/argo-helm

View File

@@ -56,18 +56,6 @@ argo-cd:
Argo CD has not reported any applications data for the past 15 minutes which
means that it must be down or not functioning properly. This needs to be
resolved for this cloud to continue to maintain state.
- alert: ArgoAppNotSynced
expr: |
argocd_app_info{sync_status!="Synced"} == 1
for: 12h
labels:
severity: warning
annotations:
summary: '{{ $labels.name }} Application not synchronized'
description: >
The application {{ $labels.name }} has not been synchronized for over
12 hours which means that the state of this cloud has drifted away from the
state inside Git.
server:
ingress:
enabled: true

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: external-secrets
version: 1.2.0
version: 1.2.1
repository: https://charts.external-secrets.io

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: kite
version: 0.7.6
version: 0.7.7
repository: https://zxh326.github.io/kite

View File

@@ -15,4 +15,6 @@ kite:
- host: weyma-kite.infra.dubyatp.xyz
paths:
- path: /
pathType: ImplementationSpecific
pathType: ImplementationSpecific
podAnnotations:
backup.velero.io/backup-volumes: kite-storage

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: kube-prometheus-stack
version: 80.6.0
version: 80.14.4
repository: https://prometheus-community.github.io/helm-charts

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: rook-ceph
version: v1.18.8
version: v1.18.9
repository: https://charts.rook.io/release

View File

@@ -497,61 +497,6 @@ spec:
oid: "1.3.6.1.4.1.50495.1.2.1.8.1"
severity: "critical"
type: "ceph_default"
- alert: "CephNodeNetworkPacketDrops"
annotations:
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet drop > 0.5% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}."
summary: "One or more NICs reports packet drops"
expr: |
(
rate(node_network_receive_drop_total{device!="lo"}[1m]) +
rate(node_network_transmit_drop_total{device!="lo"}[1m])
) / (
rate(node_network_receive_packets_total{device!="lo"}[1m]) +
rate(node_network_transmit_packets_total{device!="lo"}[1m])
) >= 0.0050000000000000001 and (
rate(node_network_receive_drop_total{device!="lo"}[1m]) +
rate(node_network_transmit_drop_total{device!="lo"}[1m])
) >= 10
labels:
oid: "1.3.6.1.4.1.50495.1.2.1.8.2"
severity: "warning"
type: "ceph_default"
- alert: "CephNodeNetworkPacketErrors"
annotations:
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet errors > 0.01% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}."
summary: "One or more NICs reports packet errors"
expr: |
(
rate(node_network_receive_errs_total{device!="lo"}[1m]) +
rate(node_network_transmit_errs_total{device!="lo"}[1m])
) / (
rate(node_network_receive_packets_total{device!="lo"}[1m]) +
rate(node_network_transmit_packets_total{device!="lo"}[1m])
) >= 0.0001 or (
rate(node_network_receive_errs_total{device!="lo"}[1m]) +
rate(node_network_transmit_errs_total{device!="lo"}[1m])
) >= 10
labels:
oid: "1.3.6.1.4.1.50495.1.2.1.8.3"
severity: "warning"
type: "ceph_default"
- alert: "CephNodeNetworkBondDegraded"
annotations:
description: "Bond {{ "{{" }} $labels.master {{ "}}" }} is degraded on Node {{ "{{" }} $labels.instance {{ "}}" }}."
summary: "Degraded Bond on Node {{ "{{" }} $labels.instance {{ "}}" }}"
expr: |
node_bonding_slaves - node_bonding_active != 0
labels:
severity: "warning"
type: "ceph_default"
- alert: "CephNodeInconsistentMTU"
annotations:
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has a different MTU size ({{ "{{" }} $value {{ "}}" }}) than the median of devices named {{ "{{" }} $labels.device {{ "}}" }}."
summary: "MTU settings across Ceph hosts are inconsistent"
expr: "node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( max by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )or node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( min by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )"
labels:
severity: "warning"
type: "ceph_default"
- name: "pools"
rules:
- alert: "CephPoolGrowthWarning"

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: traefik
version: 38.0.1
version: 38.0.2
repository: https://traefik.github.io/charts

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: velero
version: 11.2.0
version: 11.3.2
repository: https://vmware-tanzu.github.io/helm-charts