Compare commits

..

1 Commits

Author SHA1 Message Date
49a5ee3971 chore(deps): update helm release argo-cd to v9.3.0 2026-01-12 14:00:27 +00:00
17 changed files with 34 additions and 171 deletions

View File

@@ -1,37 +0,0 @@
# Main Infrastructure: weyma-talos
**Production Kubernetes infrastructure with disaster recovery capabilities**
This repository contains the foundational infrastructure for my Kubernetes homelab, designed with reliability and rapid recovery as core principles.
## Architecture
My infrastructure follows a layered "black start" approach - essential services run outside the Kubernetes cluster to enable cluster bootstrapping and recovery from total failures.
### Black Start Layer
Static services (Docker Compose on TrueNAS/Proxmox) that provide cluster dependencies:
- Image cache for faster deployments and offline capability
- Talos discovery server for node bootstrapping
- HashiCorp Vault for secrets management (external to cluster)
- Future: Self-hosted Sidero Omni server (migrating from SaaS)
### System Apps Layer
Applications running within Kubernetes that provide core cluster functionality, managed via ArgoCD with GitOps principles.
## Repository Structure
- **`black-start/`** - Docker Compose services for cluster dependencies
- **`config-patches/`** - Talos Linux configuration patches for cluster and individual machines
- **`omni/`** - Sidero Omni [cluster template](https://docs.siderolabs.com/omni/reference/cluster-templates)
- **`system-apps/`** - System applications (ArgoCD projects) - monitoring, ingress, certificates, storage
## Tech Stack
**OS:** Talos Linux | **Orchestration:** Kubernetes | **GitOps:** ArgoCD | **Secrets:** Vault | **Storage:** Rook-Ceph
## Recovery Process
The "black start" architecture enables ~15-20 minute automated recovery from complete infrastructure failure:
1. Start black-start services → 2. Bootstrap Talos → 3. Deploy system apps → 4. Deploy core apps
For application deployments, see [core-apps](https://git.dubyatp.xyz/core-apps).

View File

@@ -2,7 +2,7 @@ version: "3.8"
services:
discovery:
restart: unless-stopped
image: ghcr.io/siderolabs/discovery-service:v1.0.15
image: ghcr.io/siderolabs/discovery-service:v1.0.13
ports:
- 10.105.6.215:3000:3000
- 10.105.6.215:3001:3001

View File

@@ -52,7 +52,6 @@ patches:
bind-address: 0.0.0.0
proxy:
extraArgs:
proxy-mode: ipvs
metrics-bind-address: 0.0.0.0:10249
scheduler:
extraArgs:
@@ -288,45 +287,6 @@ patches:
selector:
k8s-app: metrics-server
name: metrics-lb
- contents: |-
apiVersion: v1
data:
Corefile: |
.:53 {
errors
health {
lameduck 5s
}
ready
log . {
class error
}
prometheus :9153
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
rewrite name git.dubyatp.xyz traefik-local.traefik.svc.cluster.local
forward . /etc/resolv.conf {
max_concurrent 1000
}
cache 30 {
disable success cluster.local
disable denial cluster.local
}
loop
reload
loadbalance
}
kind: ConfigMap
metadata:
name: coredns
namespace: kube-system
name: coredns-config
---
kind: ControlPlane
machines:

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: argo-cd
version: 9.4.6
version: 9.3.0
repository: https://argoproj.github.io/argo-helm

View File

@@ -56,6 +56,18 @@ argo-cd:
Argo CD has not reported any applications data for the past 15 minutes which
means that it must be down or not functioning properly. This needs to be
resolved for this cloud to continue to maintain state.
- alert: ArgoAppNotSynced
expr: |
argocd_app_info{sync_status!="Synced"} == 1
for: 12h
labels:
severity: warning
annotations:
summary: '{{ $labels.name }} Application not synchronized'
description: >
The application {{ $labels.name }} has not been synchronized for over
12 hours which means that the state of this cloud has drifted away from the
state inside Git.
server:
ingress:
enabled: true
@@ -128,30 +140,18 @@ argo-cd:
remoteRef:
key: argo-cd
property: webhook.gitea.secret
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: admin.password
remoteRef:
key: argo-cd
property: admin.password
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: admin.passwordMtime
remoteRef:
key: argo-cd
property: admin.passwordMtime
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: dex.authentik.clientSecret
remoteRef:
key: argo-cd
property: dex.authentik.clientSecret
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
@@ -172,23 +172,14 @@ argo-cd:
remoteRef:
key: argo-cd-git
property: sshPrivateKey
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: type
remoteRef:
key: argo-cd-git
property: type
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: url
remoteRef:
key: argo-cd-git
property: url.core-apps
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
@@ -209,23 +200,14 @@ argo-cd:
remoteRef:
key: argo-cd-git
property: sshPrivateKey
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: type
remoteRef:
key: argo-cd-git
property: type
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: url
remoteRef:
key: argo-cd-git
property: url.weyma-talos
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
@@ -246,23 +228,14 @@ argo-cd:
remoteRef:
key: argo-cd-git
property: sshPrivateKey
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: type
remoteRef:
key: argo-cd-git
property: type
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: url
remoteRef:
key: argo-cd-git
property: url.williamp-sites
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
@@ -283,20 +256,11 @@ argo-cd:
remoteRef:
key: argo-cd-git
property: sshPrivateKey
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: type
remoteRef:
key: argo-cd-git
property: type
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: url
remoteRef:
key: argo-cd-git
property: url.db-operators
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: cert-manager
version: v1.19.4
version: v1.19.2
repository: https://charts.jetstack.io

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: external-secrets
version: 2.0.1
version: 1.2.1
repository: https://charts.external-secrets.io

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: kite
version: 0.7.8
version: 0.7.7
repository: https://zxh326.github.io/kite

View File

@@ -1,7 +1,5 @@
kite:
host: "https://weyma-kite.infra.dubyatp.xyz"
deploymentStrategy:
type: Recreate
secret:
create: false
existingSecret: kite-secret

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: kubernetes-replicator
version: 2.12.3
version: 2.12.2
repository: https://helm.mittwald.de

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: kube-prometheus-stack
version: 82.4.3
version: 80.13.3
repository: https://prometheus-community.github.io/helm-charts

View File

@@ -21,7 +21,7 @@ spec:
# versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
# If you want to be more precise, you can always use a timestamp tag such as quay.io/ceph/ceph:v19.2.1-20250202
# This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
image: quay.io/ceph/ceph:v20.2.0-20251104
image: quay.io/ceph/ceph:v19.2.3-20250717
# Whether to allow unsupported versions of Ceph. Currently Reef and Squid are supported.
# Future versions such as Tentacle (v20) would require this to be set to `true`.
# Do not set to true in production.

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: rook-ceph
version: v1.19.2
version: v1.18.8
repository: https://charts.rook.io/release

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: traefik
version: 39.0.2
version: 38.0.2
repository: https://traefik.github.io/charts

View File

@@ -4,17 +4,18 @@ traefik:
- --entryPoints.websecure.transport.respondingTimeouts.readTimeout=0
ports:
web:
http:
redirections:
entryPoint:
to: websecure
scheme: https
permanent: true
redirections:
entryPoint:
to: websecure
scheme: https
permanent: true
gitssh:
port: 2222
exposedPort: 22
expose:
default: true
tls:
passthrough: true
metrics:
prometheus:
service:
@@ -37,7 +38,7 @@ traefik:
kind: DaemonSet
additionalContainers:
- name: cloudflared
image: cloudflare/cloudflared:2026.2.0
image: cloudflare/cloudflared:2025.11.1
command:
- cloudflared
- tunnel
@@ -129,26 +130,3 @@ traefik:
data:
tls.crt: ""
tls.key: ""
- apiVersion: v1
kind: Service
metadata:
name: traefik-local
spec:
sessionAffinity: ClientIP
sessionAffinityConfig:
clientIP:
timeoutSeconds: 3600
selector:
app.kubernetes.io/name: traefik
app.kubernetes.io/instance: traefik-traefik
ports:
- name: gitssh
port: 22
targetPort: gitssh
- name: web
port: 80
targetPort: web
- name: websecure
port: 443
targetPort: websecure
type: ClusterIP

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies:
- name: velero
version: 11.4.0
version: 11.3.2
repository: https://vmware-tanzu.github.io/helm-charts

View File

@@ -59,7 +59,7 @@ velero:
insecureSkipTLSVerify: "true"
initContainers:
- name: velero-plugin-for-aws
image: velero/velero-plugin-for-aws:v1.13.2
image: velero/velero-plugin-for-aws:v1.13.1
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /target