Compare commits

...

71 Commits

Author SHA1 Message Date
b789b7be21 chore(deps): update helm release kube-prometheus-stack to v80.14.4 2026-01-15 15:00:53 +00:00
3a2cfdb84e chore(deps): update helm release argo-cd to v9.3.4 2026-01-14 22:00:23 +00:00
506c034948 argocd: remove unnecessary and annoying prometheus alert 2026-01-14 13:27:50 -05:00
7cbc80906e chore(deps): update helm release kube-prometheus-stack to v80.14.3 2026-01-14 15:00:53 +00:00
3fd705520c chore(deps): update helm release kube-prometheus-stack to v80.14.2 2026-01-14 04:00:53 +00:00
94d65decd1 chore(deps): update helm release argo-cd to v9.3.3 2026-01-13 23:00:55 +00:00
e06a1be194 chore(deps): update helm release kube-prometheus-stack to v80.14.1 2026-01-13 21:01:40 +00:00
dc926c31de chore(deps): update helm release rook-ceph to v1.18.9 2026-01-13 20:00:23 +00:00
af31507e8c Merge branch 'renovate/argo-cd-9.x' 2026-01-13 11:28:24 -05:00
c0ca549393 Merge branch 'renovate/kube-prometheus-stack-80.x' 2026-01-13 11:26:09 -05:00
a113c84c9d chore(deps): update helm release kube-prometheus-stack to v80.14.0 2026-01-13 16:00:44 +00:00
a7cc46ed8a chore(deps): update helm release argo-cd to v9.3.1 2026-01-13 11:00:29 +00:00
54e6a76aab chore(deps): update helm release kube-prometheus-stack to v80.13.3 2026-01-09 10:00:58 +00:00
33ef2866e9 chore(deps): update helm release traefik to v38.0.2 2026-01-08 09:00:26 +00:00
b609e87dd3 chore(deps): update helm release kube-prometheus-stack to v80.13.2 2026-01-07 23:00:59 +00:00
e1ffafc161 chore(deps): update helm release kube-prometheus-stack to v80.13.1 2026-01-07 18:00:29 +00:00
4170dfa26c Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.13.0' (#180) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #180
2026-01-06 22:56:17 +00:00
5fcb92ee8b chore(deps): update helm release kube-prometheus-stack to v80.13.0 2026-01-06 22:00:55 +00:00
c5acc2416f chore(deps): update helm release velero to v11.3.2 2026-01-06 12:00:56 +00:00
87b667b2ab chore(deps): update helm release kube-prometheus-stack to v80.11.1 2026-01-06 11:00:55 +00:00
d68d2db3bc Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.11.0' (#177) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #177
2026-01-06 01:46:43 +00:00
ad68a17eb5 Merge pull request 'chore(deps): update helm release kite to v0.7.7' (#176) from renovate/kite-0.x into main
Reviewed-on: #176
2026-01-06 01:46:28 +00:00
b07c7bf3a0 chore(deps): update helm release kube-prometheus-stack to v80.11.0 2026-01-05 15:00:58 +00:00
78fc45ae6c chore(deps): update helm release kite to v0.7.7 2026-01-05 13:00:27 +00:00
2fa1594e99 Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.10.0' (#173) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #173
2026-01-03 16:53:26 +00:00
b211327516 chore(deps): update helm release external-secrets to v1.2.1 2026-01-02 23:00:55 +00:00
6885ec790c chore(deps): update helm release argo-cd to v9.2.4 2026-01-02 23:00:27 +00:00
664cace62e chore(deps): update helm release kube-prometheus-stack to v80.10.0 2026-01-02 18:00:26 +00:00
dae06b2c05 chore(deps): update helm release kube-prometheus-stack to v80.9.2 2025-12-31 11:00:57 +00:00
583831273d kite: create volume backups 2025-12-31 00:15:20 -05:00
f327b23001 try automerging patches 2025-12-31 00:01:26 -05:00
6f2603d3a0 remove redundant node alerts 2025-12-30 23:50:18 -05:00
c26ea4e139 Merge pull request 'chore(deps): update helm release velero to v11.3.1' (#170) from renovate/velero-11.x into main
Reviewed-on: #170
2025-12-31 04:44:22 +00:00
b521924f00 Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.9.1' (#171) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #171
2025-12-31 04:44:07 +00:00
19f203e374 re-add weyma-talos-cp04 2025-12-30 23:43:11 -05:00
bb251462fb update omni cluster template 2025-12-30 22:44:13 -05:00
9a9d108e7c chore(deps): update helm release kube-prometheus-stack to v80.9.1 2025-12-30 19:00:56 +00:00
70d5ae2e48 chore(deps): update helm release velero to v11.3.1 2025-12-29 11:00:24 +00:00
e6e25baee1 Merge pull request 'chore(deps): update helm release argo-cd to v9.2.3' (#168) from renovate/argo-cd-9.x into main
Reviewed-on: #168
2025-12-29 00:55:53 +00:00
a08e9930d5 Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.8.0' (#169) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #169
2025-12-29 00:55:41 +00:00
94bb98b4ed chore(deps): update helm release argo-cd to v9.2.3 2025-12-28 13:00:26 +00:00
07f863b0a7 chore(deps): update helm release kube-prometheus-stack to v80.8.0 2025-12-28 01:08:47 +00:00
79669aaf16 Merge pull request 'chore(deps): update helm release external-secrets to v1.2.0' (#164) from renovate/external-secrets-1.x into main
Reviewed-on: #164
2025-12-23 20:45:46 +00:00
7237e23151 Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.6.0' (#165) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #165
2025-12-23 20:45:33 +00:00
f4cc060de7 Merge pull request 'chore(deps): update helm release argo-cd to v9.2.0' (#166) from renovate/argo-cd-9.x into main
Reviewed-on: #166
2025-12-23 20:44:27 +00:00
15f5cb1cbc Merge pull request 'chore(deps): update helm release kite to v0.7.6' (#167) from renovate/kite-0.x into main
Reviewed-on: #167
2025-12-23 20:44:16 +00:00
373b418601 chore(deps): update helm release kite to v0.7.6 2025-12-23 17:03:22 +00:00
95af55533e chore(deps): update helm release argo-cd to v9.2.0 2025-12-23 12:00:27 +00:00
24b29cc9a9 consolidate everything into main file, needed for import 2025-12-22 16:49:31 -05:00
ba292377ab derp 2025-12-20 17:19:37 -05:00
9ee0e419a0 fix dispatcharr route 2025-12-20 17:16:49 -05:00
0ee35ec27c im tar 2025-12-20 17:13:04 -05:00
9697736ed3 traefik: add rule to dispatcharr route 2025-12-20 17:10:35 -05:00
1dea2edfcc traefik: add static route for dispatcharr 2025-12-20 17:00:13 -05:00
ae3d90eb10 chore(deps): update helm release kube-prometheus-stack to v80.6.0 2025-12-19 16:00:23 +00:00
72e16276b8 chore(deps): update helm release external-secrets to v1.2.0 2025-12-19 15:00:19 +00:00
f1fe246f14 Merge pull request 'chore(deps): update helm release traefik to v38' (#161) from renovate/traefik-38.x into main
Reviewed-on: #161
2025-12-19 14:59:37 +00:00
afe3aaf866 Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.5.0' (#163) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #163
2025-12-19 14:58:25 +00:00
603b6fdbd3 Merge pull request 'chore(deps): update helm release argo-cd to v9.1.9' (#162) from renovate/argo-cd-9.x into main
Reviewed-on: #162
2025-12-19 14:56:05 +00:00
b4fa24c8d1 chore(deps): update helm release traefik to v38 2025-12-19 14:01:27 +00:00
1c344f11c4 chore(deps): update helm release kube-prometheus-stack to v80.5.0 2025-12-18 16:01:00 +00:00
a9f1a7cf69 chore(deps): update helm release argo-cd to v9.1.9 2025-12-18 13:00:55 +00:00
335563a895 Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.4.2' (#160) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #160
2025-12-17 15:34:06 +00:00
72df8103f2 Merge pull request 'chore(deps): update helm release argo-cd to v9.1.8' (#159) from renovate/argo-cd-9.x into main
Reviewed-on: #159
2025-12-17 15:33:19 +00:00
22dbbaf64f chore(deps): update helm release kube-prometheus-stack to v80.4.2 2025-12-17 15:01:09 +00:00
f926df6bea enforce replicas for discord alertmanager agent 2025-12-17 09:19:15 -05:00
ecba2195b6 chore(deps): update helm release argo-cd to v9.1.8 2025-12-16 02:00:20 +00:00
da770facc5 Merge pull request 'create omni cluster template' (#158) from omni-templates into main
Reviewed-on: #158
2025-12-13 22:34:05 +00:00
f0dad1e033 create omni cluster template 2025-12-13 12:48:56 -05:00
ec154c641f Merge pull request 'chore(deps): update helm release kube-prometheus-stack to v80.4.1' (#157) from renovate/kube-prometheus-stack-80.x into main
Reviewed-on: #157
2025-12-13 12:00:17 +00:00
f5405bf44d chore(deps): update helm release kube-prometheus-stack to v80.4.1 2025-12-12 23:00:28 +00:00
14 changed files with 522 additions and 75 deletions

492
omni/weyma-talos.yaml Normal file
View File

@@ -0,0 +1,492 @@
kind: Cluster
name: weyma-talos
kubernetes:
version: v1.34.2
talos:
version: v1.11.5
features:
backupConfiguration:
interval: 6h0m0s
patches:
- idOverride: 500-5100c0c3-f72e-45f5-8cde-4a1c3b6f72a8
annotations:
description: pod-svc-subnets
name: User defined patch
inline:
cluster:
network:
podSubnets:
- 10.244.0.0/16
serviceSubnets:
- 10.112.0.0/12
- idOverride: 500-7c228773-8b44-40b0-8b4c-30f617668af0
annotations:
description: weyma-image-cache
name: User defined patch
inline:
machine:
registries:
mirrors:
docker.io:
endpoints:
- http://10.105.6.215:6000
factory.talos.dev:
endpoints:
- http://10.105.6.215:6004
gcr.io:
endpoints:
- http://10.105.6.215:6002
ghcr.io:
endpoints:
- http://10.105.6.215:6003
registry.k8s.io:
endpoints:
- http://10.105.6.215:6001
- idOverride: 500-f198cacc-280b-4874-a410-252c160621a7
annotations:
name: weyma-bind-addr
inline:
cluster:
controllerManager:
extraArgs:
bind-address: 0.0.0.0
proxy:
extraArgs:
metrics-bind-address: 0.0.0.0:10249
scheduler:
extraArgs:
bind-address: 0.0.0.0
- idOverride: 500-fc113705-0777-4b52-8df0-7cee67fcc68e
annotations:
name: weyma-bootstrap-metrics
inline:
cluster:
extraManifests:
- https://raw.githubusercontent.com/alex1989hu/kubelet-serving-cert-approver/main/deploy/standalone-install.yaml
inlineManifests:
- contents: |-
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: metrics-server
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rbac.authorization.k8s.io/aggregate-to-edit: "true"
rbac.authorization.k8s.io/aggregate-to-view: "true"
name: system:aggregated-metrics-reader
rules:
- apiGroups:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: metrics-server
name: system:metrics-server
rules:
- apiGroups:
- ""
resources:
- nodes/metrics
verbs:
- get
- apiGroups:
- ""
resources:
- pods
- nodes
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
k8s-app: metrics-server
name: metrics-server-auth-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: metrics-server
name: metrics-server:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: metrics-server
name: system:metrics-server
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:metrics-server
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
spec:
ports:
- appProtocol: https
name: https
port: 443
protocol: TCP
targetPort: https
selector:
k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
spec:
selector:
matchLabels:
k8s-app: metrics-server
strategy:
rollingUpdate:
maxUnavailable: 0
template:
metadata:
labels:
k8s-app: metrics-server
spec:
containers:
- args:
- --cert-dir=/tmp
- --secure-port=10250
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-use-node-status-port
- --metric-resolution=15s
- --kubelet-insecure-tls
image: registry.k8s.io/metrics-server/metrics-server:v0.8.0
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 3
httpGet:
path: /livez
port: https
scheme: HTTPS
periodSeconds: 10
name: metrics-server
ports:
- containerPort: 10250
name: https
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /readyz
port: https
scheme: HTTPS
initialDelaySeconds: 20
periodSeconds: 10
resources:
requests:
cpu: 100m
memory: 200Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
volumeMounts:
- mountPath: /tmp
name: tmp-dir
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
serviceAccountName: metrics-server
volumes:
- emptyDir: {}
name: tmp-dir
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
labels:
k8s-app: metrics-server
name: v1beta1.metrics.k8s.io
spec:
group: metrics.k8s.io
groupPriorityMinimum: 100
insecureSkipTLSVerify: true
service:
name: metrics-server
namespace: kube-system
version: v1beta1
versionPriority: 100
name: metrics-server
- contents: |-
apiVersion: v1
kind: Service
metadata:
name: metrics-lb
namespace: kube-system
annotations:
metallb.io/ip-allocated-from-pool: test-pool
spec:
type: LoadBalancer
ports:
- name: https
port: 443
protocol: TCP
targetPort: https
selector:
k8s-app: metrics-server
name: metrics-lb
---
kind: ControlPlane
machines:
- 20b4c826-e699-43b3-826d-73eb5173680b
- 5fdea709-56ad-45f2-966d-5e344dbe4fdf
- 30303031-3030-3030-6335-303731636665
---
kind: Workers
machines:
- 02c02200-f403-11ef-9372-70f446672600
- 03000200-0400-0500-0006-000700080009
- 1006b91a-ecbf-11ea-aed4-046ba1ee3700
- 5f0cd701-0784-4fcc-8e52-3b3304049972
- da507021-8912-4337-86a3-94a05bd1cf05
---
kind: Machine
name: 02c02200-f403-11ef-9372-70f446672600
patches:
- idOverride: 400-cm-02c02200-f403-11ef-9372-70f446672600
annotations:
name: ""
inline:
machine:
network:
hostname: weyma-talos-w02
interfaces:
- deviceSelector:
driver: igc
hardwareAddr: e8:ff:1e:d4:b8:89
dhcp: true
vlans:
- dhcp: false
vlanId: 50
- deviceSelector:
hardwareAddr: e8:ff:1e:d4:b8:8a
dhcp: true
mtu: 9000
- bridge:
interfaces:
- enp1s0.50
dhcp: false
interface: br0
---
kind: Machine
name: 03000200-0400-0500-0006-000700080009
patches:
- idOverride: 400-cm-03000200-0400-0500-0006-000700080009
annotations:
name: ""
inline:
machine:
network:
hostname: weyma-talos-testw01
interfaces:
- deviceSelector:
driver: igc
hardwareAddr: e8:ff:1e:d5:f8:22
dhcp: true
vlans:
- dhcp: false
vlanId: 50
- deviceSelector:
hardwareAddr: e8:ff:1e:d5:f8:21
dhcp: true
mtu: 9000
- bridge:
interfaces:
- enp2s0.50
dhcp: false
interface: br0
---
kind: Machine
name: 1006b91a-ecbf-11ea-aed4-046ba1ee3700
patches:
- idOverride: 400-cm-1006b91a-ecbf-11ea-aed4-046ba1ee3700
annotations:
name: ""
inline:
machine:
network:
hostname: weyma-talos-testw04
interfaces:
- deviceSelector:
driver: mlx4_core
hardwareAddr: f4:52:14:60:5e:30
dhcp: true
vlans:
- dhcp: false
vlanId: 50
- deviceSelector:
hardwareAddr: f4:52:14:60:5e:31
dhcp: true
mtu: 9000
- bridge:
interfaces:
- eno1.50
dhcp: false
interface: br0
---
kind: Machine
name: 30303031-3030-3030-6335-303731636665
patches:
- idOverride: 400-cm-30303031-3030-3030-6335-303731636665
inline:
machine:
network:
hostname: weyma-talos-cp04
interfaces:
- deviceSelector:
hardwareAddr: dc:a6:32:95:0f:cb
dhcp: true
---
kind: Machine
name: 20b4c826-e699-43b3-826d-73eb5173680b
patches:
- idOverride: 400-cm-20b4c826-e699-43b3-826d-73eb5173680b
annotations:
name: ""
inline:
machine:
network:
hostname: weyma-talos-cp02
interfaces:
- deviceSelector:
driver: virtio*
hardwareAddr: 00:16:3e:9c:01:27
dhcp: true
---
kind: Machine
name: 5f0cd701-0784-4fcc-8e52-3b3304049972
patches:
- idOverride: 400-cm-5f0cd701-0784-4fcc-8e52-3b3304049972
annotations:
name: ""
inline:
machine:
network:
hostname: weyma-talos-testw05
interfaces:
- deviceSelector:
hardwareAddr: 00:16:3e:b3:dd:f8
dhcp: true
- deviceSelector:
hardwareAddr: 00:16:3e:e5:79:0a
dhcp: true
mtu: 9000
- deviceSelector:
hardwareAddr: 00:16:3e:6b:1c:1d
dhcp: false
- bridge:
interfaces:
- enx00163e6b1c1d
dhcp: false
interface: br0
---
kind: Machine
systemExtensions:
- siderolabs/nut-client
- siderolabs/qemu-guest-agent
name: 5fdea709-56ad-45f2-966d-5e344dbe4fdf
patches:
- idOverride: 400-cm-5fdea709-56ad-45f2-966d-5e344dbe4fdf
annotations:
name: ""
inline:
machine:
network:
hostname: weyma-talos-cp01
interfaces:
- deviceSelector:
driver: virtio*
hardwareAddr: bc:24:11:e6:ff:7b
dhcp: true
---
kind: Machine
name: da507021-8912-4337-86a3-94a05bd1cf05
patches:
- idOverride: 400-cm-da507021-8912-4337-86a3-94a05bd1cf05
annotations:
name: ""
inline:
machine:
network:
hostname: weyma-talos-w03
interfaces:
- deviceSelector:
driver: virtio*
hardwareAddr: bc:24:11:be:6c:08
dhcp: true
- deviceSelector:
driver: virtio*
hardwareAddr: bc:24:11:f8:4a:92
dhcp: true
mtu: 8996
- deviceSelector:
driver: virtio*
hardwareAddr: bc:24:11:93:02:0e
dhcp: false
- bridge:
interfaces:
- enxbc241193020e
dhcp: false
interface: br0

View File

@@ -14,6 +14,12 @@
}
],
"packageRules": [
{
"description": "Automerge patch updates",
"matchUpdateTypes": ["patch"],
"matchCurrentVersion": "!/^0/",
"automerge": true
},
{
"description": "Rook Ceph - auto-update minor and patch versions only",
"matchDatasources": ["docker"],

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: argo-cd
version: 9.1.7
version: 9.3.4
repository: https://argoproj.github.io/argo-helm

View File

@@ -56,18 +56,6 @@ argo-cd:
Argo CD has not reported any applications data for the past 15 minutes which
means that it must be down or not functioning properly. This needs to be
resolved for this cloud to continue to maintain state.
- alert: ArgoAppNotSynced
expr: |
argocd_app_info{sync_status!="Synced"} == 1
for: 12h
labels:
severity: warning
annotations:
summary: '{{ $labels.name }} Application not synchronized'
description: >
The application {{ $labels.name }} has not been synchronized for over
12 hours which means that the state of this cloud has drifted away from the
state inside Git.
server:
ingress:
enabled: true

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: external-secrets
version: 1.1.1
version: 1.2.1
repository: https://charts.external-secrets.io

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: kite
version: 0.7.5
version: 0.7.7
repository: https://zxh326.github.io/kite

View File

@@ -15,4 +15,6 @@ kite:
- host: weyma-kite.infra.dubyatp.xyz
paths:
- path: /
pathType: ImplementationSpecific
pathType: ImplementationSpecific
podAnnotations:
backup.velero.io/backup-volumes: kite-storage

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: kube-prometheus-stack
version: 80.2.2
version: 80.14.4
repository: https://prometheus-community.github.io/helm-charts

View File

@@ -9,6 +9,7 @@ metadata:
app.kubernetes.io/name: alertmanager-discord
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
replicas: 1
selector:
matchLabels:
app: alertmanager-discord

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: rook-ceph
version: v1.18.8
version: v1.18.9
repository: https://charts.rook.io/release

View File

@@ -497,61 +497,6 @@ spec:
oid: "1.3.6.1.4.1.50495.1.2.1.8.1"
severity: "critical"
type: "ceph_default"
- alert: "CephNodeNetworkPacketDrops"
annotations:
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet drop > 0.5% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}."
summary: "One or more NICs reports packet drops"
expr: |
(
rate(node_network_receive_drop_total{device!="lo"}[1m]) +
rate(node_network_transmit_drop_total{device!="lo"}[1m])
) / (
rate(node_network_receive_packets_total{device!="lo"}[1m]) +
rate(node_network_transmit_packets_total{device!="lo"}[1m])
) >= 0.0050000000000000001 and (
rate(node_network_receive_drop_total{device!="lo"}[1m]) +
rate(node_network_transmit_drop_total{device!="lo"}[1m])
) >= 10
labels:
oid: "1.3.6.1.4.1.50495.1.2.1.8.2"
severity: "warning"
type: "ceph_default"
- alert: "CephNodeNetworkPacketErrors"
annotations:
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet errors > 0.01% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}."
summary: "One or more NICs reports packet errors"
expr: |
(
rate(node_network_receive_errs_total{device!="lo"}[1m]) +
rate(node_network_transmit_errs_total{device!="lo"}[1m])
) / (
rate(node_network_receive_packets_total{device!="lo"}[1m]) +
rate(node_network_transmit_packets_total{device!="lo"}[1m])
) >= 0.0001 or (
rate(node_network_receive_errs_total{device!="lo"}[1m]) +
rate(node_network_transmit_errs_total{device!="lo"}[1m])
) >= 10
labels:
oid: "1.3.6.1.4.1.50495.1.2.1.8.3"
severity: "warning"
type: "ceph_default"
- alert: "CephNodeNetworkBondDegraded"
annotations:
description: "Bond {{ "{{" }} $labels.master {{ "}}" }} is degraded on Node {{ "{{" }} $labels.instance {{ "}}" }}."
summary: "Degraded Bond on Node {{ "{{" }} $labels.instance {{ "}}" }}"
expr: |
node_bonding_slaves - node_bonding_active != 0
labels:
severity: "warning"
type: "ceph_default"
- alert: "CephNodeInconsistentMTU"
annotations:
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has a different MTU size ({{ "{{" }} $value {{ "}}" }}) than the median of devices named {{ "{{" }} $labels.device {{ "}}" }}."
summary: "MTU settings across Ceph hosts are inconsistent"
expr: "node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( max by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )or node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( min by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )"
labels:
severity: "warning"
type: "ceph_default"
- name: "pools"
rules:
- alert: "CephPoolGrowthWarning"

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: traefik
version: 37.4.0
version: 38.0.2
repository: https://traefik.github.io/charts

View File

@@ -81,6 +81,19 @@ traefik:
traefik-real-ip:
excludednets:
- "1.1.1.1/24"
routers:
dispatcharr:
entryPoints:
- websecure
service: dispatcharr
tls:
options: default
rule: 'Host(`dispatcharr.dubyatp.xyz`) && PathPrefix(`/`)'
services:
dispatcharr:
loadBalancer:
servers:
- url: http://10.105.15.20:9191
service:
spec:
externalTrafficPolicy: Local

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: velero
version: 11.2.0
version: 11.3.2
repository: https://vmware-tanzu.github.io/helm-charts