Compare commits
1 Commits
main
...
f610bc789a
| Author | SHA1 | Date | |
|---|---|---|---|
|
f610bc789a
|
37
README.md
37
README.md
@@ -1,37 +0,0 @@
|
|||||||
# Main Infrastructure: weyma-talos
|
|
||||||
|
|
||||||
**Production Kubernetes infrastructure with disaster recovery capabilities**
|
|
||||||
|
|
||||||
This repository contains the foundational infrastructure for my Kubernetes homelab, designed with reliability and rapid recovery as core principles.
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
My infrastructure follows a layered "black start" approach - essential services run outside the Kubernetes cluster to enable cluster bootstrapping and recovery from total failures.
|
|
||||||
|
|
||||||
### Black Start Layer
|
|
||||||
Static services (Docker Compose on TrueNAS/Proxmox) that provide cluster dependencies:
|
|
||||||
- Image cache for faster deployments and offline capability
|
|
||||||
- Talos discovery server for node bootstrapping
|
|
||||||
- HashiCorp Vault for secrets management (external to cluster)
|
|
||||||
- Future: Self-hosted Sidero Omni server (migrating from SaaS)
|
|
||||||
|
|
||||||
### System Apps Layer
|
|
||||||
Applications running within Kubernetes that provide core cluster functionality, managed via ArgoCD with GitOps principles.
|
|
||||||
|
|
||||||
## Repository Structure
|
|
||||||
|
|
||||||
- **`black-start/`** - Docker Compose services for cluster dependencies
|
|
||||||
- **`config-patches/`** - Talos Linux configuration patches for cluster and individual machines
|
|
||||||
- **`omni/`** - Sidero Omni [cluster template](https://docs.siderolabs.com/omni/reference/cluster-templates)
|
|
||||||
- **`system-apps/`** - System applications (ArgoCD projects) - monitoring, ingress, certificates, storage
|
|
||||||
|
|
||||||
## Tech Stack
|
|
||||||
|
|
||||||
**OS:** Talos Linux | **Orchestration:** Kubernetes | **GitOps:** ArgoCD | **Secrets:** Vault | **Storage:** Rook-Ceph
|
|
||||||
|
|
||||||
## Recovery Process
|
|
||||||
|
|
||||||
The "black start" architecture enables ~15-20 minute automated recovery from complete infrastructure failure:
|
|
||||||
1. Start black-start services → 2. Bootstrap Talos → 3. Deploy system apps → 4. Deploy core apps
|
|
||||||
|
|
||||||
For application deployments, see [core-apps](https://git.dubyatp.xyz/core-apps).
|
|
||||||
@@ -2,7 +2,7 @@ version: "3.8"
|
|||||||
services:
|
services:
|
||||||
discovery:
|
discovery:
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
image: ghcr.io/siderolabs/discovery-service:v1.0.15
|
image: ghcr.io/siderolabs/discovery-service:v1.0.13
|
||||||
ports:
|
ports:
|
||||||
- 10.105.6.215:3000:3000
|
- 10.105.6.215:3000:3000
|
||||||
- 10.105.6.215:3001:3001
|
- 10.105.6.215:3001:3001
|
||||||
@@ -52,7 +52,6 @@ patches:
|
|||||||
bind-address: 0.0.0.0
|
bind-address: 0.0.0.0
|
||||||
proxy:
|
proxy:
|
||||||
extraArgs:
|
extraArgs:
|
||||||
proxy-mode: ipvs
|
|
||||||
metrics-bind-address: 0.0.0.0:10249
|
metrics-bind-address: 0.0.0.0:10249
|
||||||
scheduler:
|
scheduler:
|
||||||
extraArgs:
|
extraArgs:
|
||||||
@@ -288,45 +287,6 @@ patches:
|
|||||||
selector:
|
selector:
|
||||||
k8s-app: metrics-server
|
k8s-app: metrics-server
|
||||||
name: metrics-lb
|
name: metrics-lb
|
||||||
- contents: |-
|
|
||||||
apiVersion: v1
|
|
||||||
data:
|
|
||||||
Corefile: |
|
|
||||||
.:53 {
|
|
||||||
errors
|
|
||||||
health {
|
|
||||||
lameduck 5s
|
|
||||||
}
|
|
||||||
ready
|
|
||||||
log . {
|
|
||||||
class error
|
|
||||||
}
|
|
||||||
prometheus :9153
|
|
||||||
|
|
||||||
kubernetes cluster.local in-addr.arpa ip6.arpa {
|
|
||||||
pods insecure
|
|
||||||
fallthrough in-addr.arpa ip6.arpa
|
|
||||||
ttl 30
|
|
||||||
}
|
|
||||||
|
|
||||||
rewrite name git.dubyatp.xyz traefik-local.traefik.svc.cluster.local
|
|
||||||
|
|
||||||
forward . /etc/resolv.conf {
|
|
||||||
max_concurrent 1000
|
|
||||||
}
|
|
||||||
cache 30 {
|
|
||||||
disable success cluster.local
|
|
||||||
disable denial cluster.local
|
|
||||||
}
|
|
||||||
loop
|
|
||||||
reload
|
|
||||||
loadbalance
|
|
||||||
}
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: coredns
|
|
||||||
namespace: kube-system
|
|
||||||
name: coredns-config
|
|
||||||
---
|
---
|
||||||
kind: ControlPlane
|
kind: ControlPlane
|
||||||
machines:
|
machines:
|
||||||
|
|||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: argo-cd
|
- name: argo-cd
|
||||||
version: 9.4.6
|
version: 9.3.4
|
||||||
repository: https://argoproj.github.io/argo-helm
|
repository: https://argoproj.github.io/argo-helm
|
||||||
@@ -128,30 +128,18 @@ argo-cd:
|
|||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd
|
key: argo-cd
|
||||||
property: webhook.gitea.secret
|
property: webhook.gitea.secret
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- secretKey: admin.password
|
- secretKey: admin.password
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd
|
key: argo-cd
|
||||||
property: admin.password
|
property: admin.password
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- secretKey: admin.passwordMtime
|
- secretKey: admin.passwordMtime
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd
|
key: argo-cd
|
||||||
property: admin.passwordMtime
|
property: admin.passwordMtime
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- secretKey: dex.authentik.clientSecret
|
- secretKey: dex.authentik.clientSecret
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd
|
key: argo-cd
|
||||||
property: dex.authentik.clientSecret
|
property: dex.authentik.clientSecret
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- apiVersion: external-secrets.io/v1
|
- apiVersion: external-secrets.io/v1
|
||||||
kind: ExternalSecret
|
kind: ExternalSecret
|
||||||
metadata:
|
metadata:
|
||||||
@@ -172,23 +160,14 @@ argo-cd:
|
|||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: sshPrivateKey
|
property: sshPrivateKey
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- secretKey: type
|
- secretKey: type
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: type
|
property: type
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- secretKey: url
|
- secretKey: url
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: url.core-apps
|
property: url.core-apps
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- apiVersion: external-secrets.io/v1
|
- apiVersion: external-secrets.io/v1
|
||||||
kind: ExternalSecret
|
kind: ExternalSecret
|
||||||
metadata:
|
metadata:
|
||||||
@@ -209,23 +188,14 @@ argo-cd:
|
|||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: sshPrivateKey
|
property: sshPrivateKey
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- secretKey: type
|
- secretKey: type
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: type
|
property: type
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- secretKey: url
|
- secretKey: url
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: url.weyma-talos
|
property: url.weyma-talos
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- apiVersion: external-secrets.io/v1
|
- apiVersion: external-secrets.io/v1
|
||||||
kind: ExternalSecret
|
kind: ExternalSecret
|
||||||
metadata:
|
metadata:
|
||||||
@@ -246,23 +216,14 @@ argo-cd:
|
|||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: sshPrivateKey
|
property: sshPrivateKey
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- secretKey: type
|
- secretKey: type
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: type
|
property: type
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- secretKey: url
|
- secretKey: url
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: url.williamp-sites
|
property: url.williamp-sites
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- apiVersion: external-secrets.io/v1
|
- apiVersion: external-secrets.io/v1
|
||||||
kind: ExternalSecret
|
kind: ExternalSecret
|
||||||
metadata:
|
metadata:
|
||||||
@@ -283,20 +244,11 @@ argo-cd:
|
|||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: sshPrivateKey
|
property: sshPrivateKey
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- secretKey: type
|
- secretKey: type
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: type
|
property: type
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
- secretKey: url
|
- secretKey: url
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: argo-cd-git
|
key: argo-cd-git
|
||||||
property: url.db-operators
|
property: url.db-operators
|
||||||
conversionStrategy: Default
|
|
||||||
decodingStrategy: None
|
|
||||||
metadataPolicy: None
|
|
||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: cert-manager
|
- name: cert-manager
|
||||||
version: v1.19.4
|
version: v1.19.2
|
||||||
repository: https://charts.jetstack.io
|
repository: https://charts.jetstack.io
|
||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: external-secrets
|
- name: external-secrets
|
||||||
version: 2.0.1
|
version: 1.2.1
|
||||||
repository: https://charts.external-secrets.io
|
repository: https://charts.external-secrets.io
|
||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: kite
|
- name: kite
|
||||||
version: 0.7.8
|
version: 0.7.7
|
||||||
repository: https://zxh326.github.io/kite
|
repository: https://zxh326.github.io/kite
|
||||||
@@ -1,7 +1,5 @@
|
|||||||
kite:
|
kite:
|
||||||
host: "https://weyma-kite.infra.dubyatp.xyz"
|
host: "https://weyma-kite.infra.dubyatp.xyz"
|
||||||
deploymentStrategy:
|
|
||||||
type: Recreate
|
|
||||||
secret:
|
secret:
|
||||||
create: false
|
create: false
|
||||||
existingSecret: kite-secret
|
existingSecret: kite-secret
|
||||||
|
|||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: kubernetes-replicator
|
- name: kubernetes-replicator
|
||||||
version: 2.12.3
|
version: 2.12.2
|
||||||
repository: https://helm.mittwald.de
|
repository: https://helm.mittwald.de
|
||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: kube-prometheus-stack
|
- name: kube-prometheus-stack
|
||||||
version: 82.4.3
|
version: 81.0.0
|
||||||
repository: https://prometheus-community.github.io/helm-charts
|
repository: https://prometheus-community.github.io/helm-charts
|
||||||
@@ -21,7 +21,7 @@ spec:
|
|||||||
# versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
|
# versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
|
||||||
# If you want to be more precise, you can always use a timestamp tag such as quay.io/ceph/ceph:v19.2.1-20250202
|
# If you want to be more precise, you can always use a timestamp tag such as quay.io/ceph/ceph:v19.2.1-20250202
|
||||||
# This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
|
# This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
|
||||||
image: quay.io/ceph/ceph:v20.2.0-20251104
|
image: quay.io/ceph/ceph:v19.2.3-20250717
|
||||||
# Whether to allow unsupported versions of Ceph. Currently Reef and Squid are supported.
|
# Whether to allow unsupported versions of Ceph. Currently Reef and Squid are supported.
|
||||||
# Future versions such as Tentacle (v20) would require this to be set to `true`.
|
# Future versions such as Tentacle (v20) would require this to be set to `true`.
|
||||||
# Do not set to true in production.
|
# Do not set to true in production.
|
||||||
|
|||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: rook-ceph
|
- name: rook-ceph
|
||||||
version: v1.19.2
|
version: v1.18.9
|
||||||
repository: https://charts.rook.io/release
|
repository: https://charts.rook.io/release
|
||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: traefik
|
- name: traefik
|
||||||
version: 39.0.2
|
version: 38.0.2
|
||||||
repository: https://traefik.github.io/charts
|
repository: https://traefik.github.io/charts
|
||||||
@@ -4,17 +4,18 @@ traefik:
|
|||||||
- --entryPoints.websecure.transport.respondingTimeouts.readTimeout=0
|
- --entryPoints.websecure.transport.respondingTimeouts.readTimeout=0
|
||||||
ports:
|
ports:
|
||||||
web:
|
web:
|
||||||
http:
|
redirections:
|
||||||
redirections:
|
entryPoint:
|
||||||
entryPoint:
|
to: websecure
|
||||||
to: websecure
|
scheme: https
|
||||||
scheme: https
|
permanent: true
|
||||||
permanent: true
|
|
||||||
gitssh:
|
gitssh:
|
||||||
port: 2222
|
port: 2222
|
||||||
exposedPort: 22
|
exposedPort: 22
|
||||||
expose:
|
expose:
|
||||||
default: true
|
default: true
|
||||||
|
tls:
|
||||||
|
passthrough: true
|
||||||
metrics:
|
metrics:
|
||||||
prometheus:
|
prometheus:
|
||||||
service:
|
service:
|
||||||
@@ -37,7 +38,7 @@ traefik:
|
|||||||
kind: DaemonSet
|
kind: DaemonSet
|
||||||
additionalContainers:
|
additionalContainers:
|
||||||
- name: cloudflared
|
- name: cloudflared
|
||||||
image: cloudflare/cloudflared:2026.2.0
|
image: cloudflare/cloudflared:2025.11.1
|
||||||
command:
|
command:
|
||||||
- cloudflared
|
- cloudflared
|
||||||
- tunnel
|
- tunnel
|
||||||
@@ -128,27 +129,4 @@ traefik:
|
|||||||
replicator.v1.mittwald.de/replicated-keys: "tls.crt,tls.key"
|
replicator.v1.mittwald.de/replicated-keys: "tls.crt,tls.key"
|
||||||
data:
|
data:
|
||||||
tls.crt: ""
|
tls.crt: ""
|
||||||
tls.key: ""
|
tls.key: ""
|
||||||
- apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: traefik-local
|
|
||||||
spec:
|
|
||||||
sessionAffinity: ClientIP
|
|
||||||
sessionAffinityConfig:
|
|
||||||
clientIP:
|
|
||||||
timeoutSeconds: 3600
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: traefik
|
|
||||||
app.kubernetes.io/instance: traefik-traefik
|
|
||||||
ports:
|
|
||||||
- name: gitssh
|
|
||||||
port: 22
|
|
||||||
targetPort: gitssh
|
|
||||||
- name: web
|
|
||||||
port: 80
|
|
||||||
targetPort: web
|
|
||||||
- name: websecure
|
|
||||||
port: 443
|
|
||||||
targetPort: websecure
|
|
||||||
type: ClusterIP
|
|
||||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: velero
|
- name: velero
|
||||||
version: 11.4.0
|
version: 11.3.2
|
||||||
repository: https://vmware-tanzu.github.io/helm-charts
|
repository: https://vmware-tanzu.github.io/helm-charts
|
||||||
Reference in New Issue
Block a user