Compare commits

..

1 Commits

16 changed files with 21 additions and 170 deletions

View File

@@ -1,37 +0,0 @@
# Main Infrastructure: weyma-talos
**Production Kubernetes infrastructure with disaster recovery capabilities**
This repository contains the foundational infrastructure for my Kubernetes homelab, designed with reliability and rapid recovery as core principles.
## Architecture
My infrastructure follows a layered "black start" approach - essential services run outside the Kubernetes cluster to enable cluster bootstrapping and recovery from total failures.
### Black Start Layer
Static services (Docker Compose on TrueNAS/Proxmox) that provide cluster dependencies:
- Image cache for faster deployments and offline capability
- Talos discovery server for node bootstrapping
- HashiCorp Vault for secrets management (external to cluster)
- Future: Self-hosted Sidero Omni server (migrating from SaaS)
### System Apps Layer
Applications running within Kubernetes that provide core cluster functionality, managed via ArgoCD with GitOps principles.
## Repository Structure
- **`black-start/`** - Docker Compose services for cluster dependencies
- **`config-patches/`** - Talos Linux configuration patches for cluster and individual machines
- **`omni/`** - Sidero Omni [cluster template](https://docs.siderolabs.com/omni/reference/cluster-templates)
- **`system-apps/`** - System applications (ArgoCD projects) - monitoring, ingress, certificates, storage
## Tech Stack
**OS:** Talos Linux | **Orchestration:** Kubernetes | **GitOps:** ArgoCD | **Secrets:** Vault | **Storage:** Rook-Ceph
## Recovery Process
The "black start" architecture enables ~15-20 minute automated recovery from complete infrastructure failure:
1. Start black-start services → 2. Bootstrap Talos → 3. Deploy system apps → 4. Deploy core apps
For application deployments, see [core-apps](https://git.dubyatp.xyz/core-apps).

View File

@@ -2,7 +2,7 @@ version: "3.8"
services: services:
discovery: discovery:
restart: unless-stopped restart: unless-stopped
image: ghcr.io/siderolabs/discovery-service:v1.0.15 image: ghcr.io/siderolabs/discovery-service:v1.0.13
ports: ports:
- 10.105.6.215:3000:3000 - 10.105.6.215:3000:3000
- 10.105.6.215:3001:3001 - 10.105.6.215:3001:3001

View File

@@ -52,7 +52,6 @@ patches:
bind-address: 0.0.0.0 bind-address: 0.0.0.0
proxy: proxy:
extraArgs: extraArgs:
proxy-mode: ipvs
metrics-bind-address: 0.0.0.0:10249 metrics-bind-address: 0.0.0.0:10249
scheduler: scheduler:
extraArgs: extraArgs:
@@ -288,45 +287,6 @@ patches:
selector: selector:
k8s-app: metrics-server k8s-app: metrics-server
name: metrics-lb name: metrics-lb
- contents: |-
apiVersion: v1
data:
Corefile: |
.:53 {
errors
health {
lameduck 5s
}
ready
log . {
class error
}
prometheus :9153
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
rewrite name git.dubyatp.xyz traefik-local.traefik.svc.cluster.local
forward . /etc/resolv.conf {
max_concurrent 1000
}
cache 30 {
disable success cluster.local
disable denial cluster.local
}
loop
reload
loadbalance
}
kind: ConfigMap
metadata:
name: coredns
namespace: kube-system
name: coredns-config
--- ---
kind: ControlPlane kind: ControlPlane
machines: machines:

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies: dependencies:
- name: argo-cd - name: argo-cd
version: 9.4.6 version: 9.3.4
repository: https://argoproj.github.io/argo-helm repository: https://argoproj.github.io/argo-helm

View File

@@ -128,30 +128,18 @@ argo-cd:
remoteRef: remoteRef:
key: argo-cd key: argo-cd
property: webhook.gitea.secret property: webhook.gitea.secret
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: admin.password - secretKey: admin.password
remoteRef: remoteRef:
key: argo-cd key: argo-cd
property: admin.password property: admin.password
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: admin.passwordMtime - secretKey: admin.passwordMtime
remoteRef: remoteRef:
key: argo-cd key: argo-cd
property: admin.passwordMtime property: admin.passwordMtime
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: dex.authentik.clientSecret - secretKey: dex.authentik.clientSecret
remoteRef: remoteRef:
key: argo-cd key: argo-cd
property: dex.authentik.clientSecret property: dex.authentik.clientSecret
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- apiVersion: external-secrets.io/v1 - apiVersion: external-secrets.io/v1
kind: ExternalSecret kind: ExternalSecret
metadata: metadata:
@@ -172,23 +160,14 @@ argo-cd:
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: sshPrivateKey property: sshPrivateKey
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: type - secretKey: type
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: type property: type
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: url - secretKey: url
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: url.core-apps property: url.core-apps
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- apiVersion: external-secrets.io/v1 - apiVersion: external-secrets.io/v1
kind: ExternalSecret kind: ExternalSecret
metadata: metadata:
@@ -209,23 +188,14 @@ argo-cd:
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: sshPrivateKey property: sshPrivateKey
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: type - secretKey: type
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: type property: type
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: url - secretKey: url
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: url.weyma-talos property: url.weyma-talos
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- apiVersion: external-secrets.io/v1 - apiVersion: external-secrets.io/v1
kind: ExternalSecret kind: ExternalSecret
metadata: metadata:
@@ -246,23 +216,14 @@ argo-cd:
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: sshPrivateKey property: sshPrivateKey
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: type - secretKey: type
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: type property: type
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: url - secretKey: url
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: url.williamp-sites property: url.williamp-sites
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- apiVersion: external-secrets.io/v1 - apiVersion: external-secrets.io/v1
kind: ExternalSecret kind: ExternalSecret
metadata: metadata:
@@ -283,20 +244,11 @@ argo-cd:
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: sshPrivateKey property: sshPrivateKey
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: type - secretKey: type
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: type property: type
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None
- secretKey: url - secretKey: url
remoteRef: remoteRef:
key: argo-cd-git key: argo-cd-git
property: url.db-operators property: url.db-operators
conversionStrategy: Default
decodingStrategy: None
metadataPolicy: None

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies: dependencies:
- name: cert-manager - name: cert-manager
version: v1.19.4 version: v1.19.2
repository: https://charts.jetstack.io repository: https://charts.jetstack.io

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies: dependencies:
- name: external-secrets - name: external-secrets
version: 2.0.1 version: 1.2.1
repository: https://charts.external-secrets.io repository: https://charts.external-secrets.io

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies: dependencies:
- name: kite - name: kite
version: 0.7.8 version: 0.7.7
repository: https://zxh326.github.io/kite repository: https://zxh326.github.io/kite

View File

@@ -1,7 +1,5 @@
kite: kite:
host: "https://weyma-kite.infra.dubyatp.xyz" host: "https://weyma-kite.infra.dubyatp.xyz"
deploymentStrategy:
type: Recreate
secret: secret:
create: false create: false
existingSecret: kite-secret existingSecret: kite-secret

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies: dependencies:
- name: kubernetes-replicator - name: kubernetes-replicator
version: 2.12.3 version: 2.12.2
repository: https://helm.mittwald.de repository: https://helm.mittwald.de

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies: dependencies:
- name: kube-prometheus-stack - name: kube-prometheus-stack
version: 82.4.3 version: 81.0.0
repository: https://prometheus-community.github.io/helm-charts repository: https://prometheus-community.github.io/helm-charts

View File

@@ -21,7 +21,7 @@ spec:
# versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/. # versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
# If you want to be more precise, you can always use a timestamp tag such as quay.io/ceph/ceph:v19.2.1-20250202 # If you want to be more precise, you can always use a timestamp tag such as quay.io/ceph/ceph:v19.2.1-20250202
# This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities # This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
image: quay.io/ceph/ceph:v20.2.0-20251104 image: quay.io/ceph/ceph:v19.2.3-20250717
# Whether to allow unsupported versions of Ceph. Currently Reef and Squid are supported. # Whether to allow unsupported versions of Ceph. Currently Reef and Squid are supported.
# Future versions such as Tentacle (v20) would require this to be set to `true`. # Future versions such as Tentacle (v20) would require this to be set to `true`.
# Do not set to true in production. # Do not set to true in production.

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies: dependencies:
- name: rook-ceph - name: rook-ceph
version: v1.19.2 version: v1.18.9
repository: https://charts.rook.io/release repository: https://charts.rook.io/release

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies: dependencies:
- name: traefik - name: traefik
version: 39.0.2 version: 38.0.2
repository: https://traefik.github.io/charts repository: https://traefik.github.io/charts

View File

@@ -4,7 +4,6 @@ traefik:
- --entryPoints.websecure.transport.respondingTimeouts.readTimeout=0 - --entryPoints.websecure.transport.respondingTimeouts.readTimeout=0
ports: ports:
web: web:
http:
redirections: redirections:
entryPoint: entryPoint:
to: websecure to: websecure
@@ -15,6 +14,8 @@ traefik:
exposedPort: 22 exposedPort: 22
expose: expose:
default: true default: true
tls:
passthrough: true
metrics: metrics:
prometheus: prometheus:
service: service:
@@ -37,7 +38,7 @@ traefik:
kind: DaemonSet kind: DaemonSet
additionalContainers: additionalContainers:
- name: cloudflared - name: cloudflared
image: cloudflare/cloudflared:2026.2.0 image: cloudflare/cloudflared:2025.11.1
command: command:
- cloudflared - cloudflared
- tunnel - tunnel
@@ -129,26 +130,3 @@ traefik:
data: data:
tls.crt: "" tls.crt: ""
tls.key: "" tls.key: ""
- apiVersion: v1
kind: Service
metadata:
name: traefik-local
spec:
sessionAffinity: ClientIP
sessionAffinityConfig:
clientIP:
timeoutSeconds: 3600
selector:
app.kubernetes.io/name: traefik
app.kubernetes.io/instance: traefik-traefik
ports:
- name: gitssh
port: 22
targetPort: gitssh
- name: web
port: 80
targetPort: web
- name: websecure
port: 443
targetPort: websecure
type: ClusterIP

View File

@@ -24,5 +24,5 @@ appVersion: "1.0"
dependencies: dependencies:
- name: velero - name: velero
version: 11.4.0 version: 11.3.2
repository: https://vmware-tanzu.github.io/helm-charts repository: https://vmware-tanzu.github.io/helm-charts