Compare commits
1 Commits
main
...
9319202f35
| Author | SHA1 | Date | |
|---|---|---|---|
|
9319202f35
|
37
README.md
37
README.md
@@ -1,37 +0,0 @@
|
||||
# Main Infrastructure: weyma-talos
|
||||
|
||||
**Production Kubernetes infrastructure with disaster recovery capabilities**
|
||||
|
||||
This repository contains the foundational infrastructure for my Kubernetes homelab, designed with reliability and rapid recovery as core principles.
|
||||
|
||||
## Architecture
|
||||
|
||||
My infrastructure follows a layered "black start" approach - essential services run outside the Kubernetes cluster to enable cluster bootstrapping and recovery from total failures.
|
||||
|
||||
### Black Start Layer
|
||||
Static services (Docker Compose on TrueNAS/Proxmox) that provide cluster dependencies:
|
||||
- Image cache for faster deployments and offline capability
|
||||
- Talos discovery server for node bootstrapping
|
||||
- HashiCorp Vault for secrets management (external to cluster)
|
||||
- Future: Self-hosted Sidero Omni server (migrating from SaaS)
|
||||
|
||||
### System Apps Layer
|
||||
Applications running within Kubernetes that provide core cluster functionality, managed via ArgoCD with GitOps principles.
|
||||
|
||||
## Repository Structure
|
||||
|
||||
- **`black-start/`** - Docker Compose services for cluster dependencies
|
||||
- **`config-patches/`** - Talos Linux configuration patches for cluster and individual machines
|
||||
- **`omni/`** - Sidero Omni [cluster template](https://docs.siderolabs.com/omni/reference/cluster-templates)
|
||||
- **`system-apps/`** - System applications (ArgoCD projects) - monitoring, ingress, certificates, storage
|
||||
|
||||
## Tech Stack
|
||||
|
||||
**OS:** Talos Linux | **Orchestration:** Kubernetes | **GitOps:** ArgoCD | **Secrets:** Vault | **Storage:** Rook-Ceph
|
||||
|
||||
## Recovery Process
|
||||
|
||||
The "black start" architecture enables ~15-20 minute automated recovery from complete infrastructure failure:
|
||||
1. Start black-start services → 2. Bootstrap Talos → 3. Deploy system apps → 4. Deploy core apps
|
||||
|
||||
For application deployments, see [core-apps](https://git.dubyatp.xyz/core-apps).
|
||||
@@ -2,7 +2,7 @@ version: "3.8"
|
||||
services:
|
||||
discovery:
|
||||
restart: unless-stopped
|
||||
image: ghcr.io/siderolabs/discovery-service:v1.0.15
|
||||
image: ghcr.io/siderolabs/discovery-service:v1.0.13
|
||||
ports:
|
||||
- 10.105.6.215:3000:3000
|
||||
- 10.105.6.215:3001:3001
|
||||
@@ -52,7 +52,6 @@ patches:
|
||||
bind-address: 0.0.0.0
|
||||
proxy:
|
||||
extraArgs:
|
||||
proxy-mode: ipvs
|
||||
metrics-bind-address: 0.0.0.0:10249
|
||||
scheduler:
|
||||
extraArgs:
|
||||
@@ -288,45 +287,6 @@ patches:
|
||||
selector:
|
||||
k8s-app: metrics-server
|
||||
name: metrics-lb
|
||||
- contents: |-
|
||||
apiVersion: v1
|
||||
data:
|
||||
Corefile: |
|
||||
.:53 {
|
||||
errors
|
||||
health {
|
||||
lameduck 5s
|
||||
}
|
||||
ready
|
||||
log . {
|
||||
class error
|
||||
}
|
||||
prometheus :9153
|
||||
|
||||
kubernetes cluster.local in-addr.arpa ip6.arpa {
|
||||
pods insecure
|
||||
fallthrough in-addr.arpa ip6.arpa
|
||||
ttl 30
|
||||
}
|
||||
|
||||
rewrite name git.dubyatp.xyz traefik-local.traefik.svc.cluster.local
|
||||
|
||||
forward . /etc/resolv.conf {
|
||||
max_concurrent 1000
|
||||
}
|
||||
cache 30 {
|
||||
disable success cluster.local
|
||||
disable denial cluster.local
|
||||
}
|
||||
loop
|
||||
reload
|
||||
loadbalance
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: coredns
|
||||
namespace: kube-system
|
||||
name: coredns-config
|
||||
---
|
||||
kind: ControlPlane
|
||||
machines:
|
||||
|
||||
@@ -15,9 +15,10 @@
|
||||
],
|
||||
"packageRules": [
|
||||
{
|
||||
"description": "Consolidate patch and minor updates to one PR",
|
||||
"matchUpdateTypes": ["minor", "patch"],
|
||||
"groupName": "all-minor-patch-updates"
|
||||
"description": "Automerge patch updates",
|
||||
"matchUpdateTypes": ["patch"],
|
||||
"matchCurrentVersion": "!/^0/",
|
||||
"automerge": true
|
||||
},
|
||||
{
|
||||
"description": "Rook Ceph - auto-update minor and patch versions only",
|
||||
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: argo-cd
|
||||
version: 9.4.7
|
||||
version: 9.3.7
|
||||
repository: https://argoproj.github.io/argo-helm
|
||||
@@ -128,30 +128,18 @@ argo-cd:
|
||||
remoteRef:
|
||||
key: argo-cd
|
||||
property: webhook.gitea.secret
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- secretKey: admin.password
|
||||
remoteRef:
|
||||
key: argo-cd
|
||||
property: admin.password
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- secretKey: admin.passwordMtime
|
||||
remoteRef:
|
||||
key: argo-cd
|
||||
property: admin.passwordMtime
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- secretKey: dex.authentik.clientSecret
|
||||
remoteRef:
|
||||
key: argo-cd
|
||||
property: dex.authentik.clientSecret
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
@@ -172,23 +160,14 @@ argo-cd:
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: sshPrivateKey
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- secretKey: type
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: type
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- secretKey: url
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: url.core-apps
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
@@ -209,23 +188,14 @@ argo-cd:
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: sshPrivateKey
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- secretKey: type
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: type
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- secretKey: url
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: url.weyma-talos
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
@@ -246,23 +216,14 @@ argo-cd:
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: sshPrivateKey
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- secretKey: type
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: type
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- secretKey: url
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: url.williamp-sites
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
@@ -283,20 +244,11 @@ argo-cd:
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: sshPrivateKey
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- secretKey: type
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: type
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
- secretKey: url
|
||||
remoteRef:
|
||||
key: argo-cd-git
|
||||
property: url.db-operators
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: cert-manager
|
||||
version: v1.19.4
|
||||
version: v1.19.2
|
||||
repository: https://charts.jetstack.io
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: external-secrets
|
||||
version: 2.0.1
|
||||
version: 1.3.1
|
||||
repository: https://charts.external-secrets.io
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: kubernetes-replicator
|
||||
version: 2.12.3
|
||||
version: 2.12.2
|
||||
repository: https://helm.mittwald.de
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: kube-prometheus-stack
|
||||
version: 82.8.0
|
||||
version: 81.4.1
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: rook-ceph
|
||||
version: v1.19.2
|
||||
version: v1.19.0
|
||||
repository: https://charts.rook.io/release
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: traefik
|
||||
version: 39.0.2
|
||||
version: 39.0.0
|
||||
repository: https://traefik.github.io/charts
|
||||
@@ -37,7 +37,7 @@ traefik:
|
||||
kind: DaemonSet
|
||||
additionalContainers:
|
||||
- name: cloudflared
|
||||
image: cloudflare/cloudflared:2026.2.0
|
||||
image: cloudflare/cloudflared:2026.1.2
|
||||
command:
|
||||
- cloudflared
|
||||
- tunnel
|
||||
@@ -129,26 +129,3 @@ traefik:
|
||||
data:
|
||||
tls.crt: ""
|
||||
tls.key: ""
|
||||
- apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: traefik-local
|
||||
spec:
|
||||
sessionAffinity: ClientIP
|
||||
sessionAffinityConfig:
|
||||
clientIP:
|
||||
timeoutSeconds: 3600
|
||||
selector:
|
||||
app.kubernetes.io/name: traefik
|
||||
app.kubernetes.io/instance: traefik-traefik
|
||||
ports:
|
||||
- name: gitssh
|
||||
port: 22
|
||||
targetPort: gitssh
|
||||
- name: web
|
||||
port: 80
|
||||
targetPort: web
|
||||
- name: websecure
|
||||
port: 443
|
||||
targetPort: websecure
|
||||
type: ClusterIP
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: velero
|
||||
version: 11.4.0
|
||||
version: 11.3.2
|
||||
repository: https://vmware-tanzu.github.io/helm-charts
|
||||
Reference in New Issue
Block a user