Compare commits
20 Commits
b9a8f3fea8
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| cc858dd8f3 | |||
|
5d71a0f199
|
|||
|
a4d2f870d9
|
|||
|
7136a0f322
|
|||
|
c2d6c0c8bb
|
|||
| f3c3741409 | |||
|
80b7cb2282
|
|||
| bf66dd0818 | |||
|
eea1c80a27
|
|||
|
612dd16d4b
|
|||
| 341b402f0e | |||
|
76eaa1dd98
|
|||
| a730f43cbd | |||
|
4bd23be552
|
|||
|
6cd4b20970
|
|||
|
c3c66cb9e3
|
|||
|
b0fb79f7ea
|
|||
|
624c5c7a8c
|
|||
|
ebf8f25342
|
|||
|
87c5d94e0d
|
@@ -2,7 +2,7 @@ version: "3.8"
|
||||
services:
|
||||
discovery:
|
||||
restart: unless-stopped
|
||||
image: ghcr.io/siderolabs/discovery-service:v1.0.11
|
||||
image: ghcr.io/siderolabs/discovery-service:v1.0.12
|
||||
ports:
|
||||
- 10.105.6.215:3000:3000
|
||||
- 10.105.6.215:3001:3001
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: argo-cd
|
||||
version: 9.1.4
|
||||
version: 9.1.5
|
||||
repository: https://argoproj.github.io/argo-helm
|
||||
@@ -171,7 +171,7 @@ resources: {}
|
||||
|
||||
serviceMonitor:
|
||||
# -- Specifies whether to create a ServiceMonitor resource for collecting Prometheus metrics
|
||||
enabled: false
|
||||
enabled: true
|
||||
|
||||
# -- namespace where you want to install ServiceMonitors
|
||||
namespace: ""
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: guestbook-ui
|
||||
namespace: guestbook-ui
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: guestbook-ui
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: guestbook-ui
|
||||
spec:
|
||||
containers:
|
||||
- image: gcr.io/heptio-images/ks-guestbook-demo:0.2
|
||||
name: guestbook-ui
|
||||
ports:
|
||||
- containerPort: 80
|
||||
@@ -1,4 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: guestbook-ui
|
||||
@@ -1,11 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: guestbook-ui
|
||||
namespace: guestbook-ui
|
||||
spec:
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: guestbook-ui
|
||||
@@ -1,354 +1,7 @@
|
||||
# Default values for metallb.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
imagePullSecrets: []
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
loadBalancerClass: ""
|
||||
|
||||
# To configure MetalLB, you must specify ONE of the following two
|
||||
# options.
|
||||
|
||||
rbac:
|
||||
# create specifies whether to install and use RBAC rules.
|
||||
create: true
|
||||
|
||||
metallb:
|
||||
prometheus:
|
||||
# scrape annotations specifies whether to add Prometheus metric
|
||||
# auto-collection annotations to pods. See
|
||||
# https://github.com/prometheus/prometheus/blob/release-2.1/documentation/examples/prometheus-kubernetes.yml
|
||||
# for a corresponding Prometheus configuration. Alternatively, you
|
||||
# may want to use the Prometheus Operator
|
||||
# (https://github.com/coreos/prometheus-operator) for more powerful
|
||||
# monitoring configuration. If you use the Prometheus operator, this
|
||||
# can be left at false.
|
||||
scrapeAnnotations: false
|
||||
|
||||
# port both controller and speaker will listen on for metrics
|
||||
metricsPort: 7472
|
||||
|
||||
# if set, enables rbac proxy on the controller and speaker to expose
|
||||
# the metrics via tls.
|
||||
# secureMetricsPort: 9120
|
||||
|
||||
# the name of the secret to be mounted in the speaker pod
|
||||
# to expose the metrics securely. If not present, a self signed
|
||||
# certificate to be used.
|
||||
speakerMetricsTLSSecret: ""
|
||||
|
||||
# the name of the secret to be mounted in the controller pod
|
||||
# to expose the metrics securely. If not present, a self signed
|
||||
# certificate to be used.
|
||||
controllerMetricsTLSSecret: ""
|
||||
|
||||
# prometheus doesn't have the permission to scrape all namespaces so we give it permission to scrape metallb's one
|
||||
rbacPrometheus: true
|
||||
|
||||
# the service account used by prometheus
|
||||
# required when " .Values.prometheus.rbacPrometheus == true " and " .Values.prometheus.podMonitor.enabled=true or prometheus.serviceMonitor.enabled=true "
|
||||
serviceAccount: ""
|
||||
|
||||
# the namespace where prometheus is deployed
|
||||
# required when " .Values.prometheus.rbacPrometheus == true " and " .Values.prometheus.podMonitor.enabled=true or prometheus.serviceMonitor.enabled=true "
|
||||
namespace: ""
|
||||
|
||||
# the image to be used for the kuberbacproxy container
|
||||
rbacProxy:
|
||||
repository: gcr.io/kubebuilder/kube-rbac-proxy
|
||||
tag: v0.12.0
|
||||
pullPolicy:
|
||||
|
||||
# Prometheus Operator PodMonitors
|
||||
rbacPrometheus: false
|
||||
podMonitor:
|
||||
# enable support for Prometheus Operator
|
||||
enabled: false
|
||||
|
||||
# optional additional labels for podMonitors
|
||||
additionalLabels: {}
|
||||
|
||||
# optional annotations for podMonitors
|
||||
annotations: {}
|
||||
|
||||
# Job label for scrape target
|
||||
jobLabel: "app.kubernetes.io/name"
|
||||
|
||||
# Scrape interval. If not set, the Prometheus default scrape interval is used.
|
||||
interval:
|
||||
|
||||
# metric relabel configs to apply to samples before ingestion.
|
||||
metricRelabelings: []
|
||||
# - action: keep
|
||||
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
|
||||
# sourceLabels: [__name__]
|
||||
|
||||
# relabel configs to apply to samples before ingestion.
|
||||
relabelings: []
|
||||
# - sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
# separator: ;
|
||||
# regex: ^(.*)$
|
||||
# target_label: nodename
|
||||
# replacement: $1
|
||||
# action: replace
|
||||
|
||||
# Prometheus Operator ServiceMonitors. To be used as an alternative
|
||||
# to podMonitor, supports secure metrics.
|
||||
serviceMonitor:
|
||||
# enable support for Prometheus Operator
|
||||
enabled: false
|
||||
|
||||
speaker:
|
||||
# optional additional labels for the speaker serviceMonitor
|
||||
additionalLabels: {}
|
||||
# optional additional annotations for the speaker serviceMonitor
|
||||
annotations: {}
|
||||
# optional tls configuration for the speaker serviceMonitor, in case
|
||||
# secure metrics are enabled.
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
|
||||
controller:
|
||||
# optional additional labels for the controller serviceMonitor
|
||||
additionalLabels: {}
|
||||
# optional additional annotations for the controller serviceMonitor
|
||||
annotations: {}
|
||||
# optional tls configuration for the controller serviceMonitor, in case
|
||||
# secure metrics are enabled.
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
|
||||
# Job label for scrape target
|
||||
jobLabel: "app.kubernetes.io/name"
|
||||
|
||||
# Scrape interval. If not set, the Prometheus default scrape interval is used.
|
||||
interval:
|
||||
|
||||
# metric relabel configs to apply to samples before ingestion.
|
||||
metricRelabelings: []
|
||||
# - action: keep
|
||||
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
|
||||
# sourceLabels: [__name__]
|
||||
|
||||
# relabel configs to apply to samples before ingestion.
|
||||
relabelings: []
|
||||
# - sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
# separator: ;
|
||||
# regex: ^(.*)$
|
||||
# target_label: nodename
|
||||
# replacement: $1
|
||||
# action: replace
|
||||
|
||||
# Prometheus Operator alertmanager alerts
|
||||
enabled: true
|
||||
prometheusRule:
|
||||
# enable alertmanager alerts
|
||||
enabled: false
|
||||
|
||||
# optional additional labels for prometheusRules
|
||||
additionalLabels: {}
|
||||
|
||||
# optional annotations for prometheusRules
|
||||
annotations: {}
|
||||
|
||||
# MetalLBStaleConfig
|
||||
staleConfig:
|
||||
enabled: true
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
# MetalLBConfigNotLoaded
|
||||
configNotLoaded:
|
||||
enabled: true
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
# MetalLBAddressPoolExhausted
|
||||
addressPoolExhausted:
|
||||
enabled: true
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
addressPoolUsage:
|
||||
enabled: true
|
||||
thresholds:
|
||||
- percent: 75
|
||||
labels:
|
||||
severity: warning
|
||||
- percent: 85
|
||||
labels:
|
||||
severity: warning
|
||||
- percent: 95
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
# MetalLBBGPSessionDown
|
||||
bgpSessionDown:
|
||||
enabled: true
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
extraAlerts: []
|
||||
|
||||
# controller contains configuration specific to the MetalLB cluster
|
||||
# controller.
|
||||
controller:
|
||||
enabled: true
|
||||
# -- Controller log level. Must be one of: `all`, `debug`, `info`, `warn`, `error` or `none`
|
||||
logLevel: info
|
||||
# command: /controller
|
||||
# webhookMode: enabled
|
||||
|
||||
## @param controller.updateStrategy.type Metallb controller deployment strategy type.
|
||||
## ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy
|
||||
## e.g:
|
||||
## strategy:
|
||||
## type: RollingUpdate
|
||||
## rollingUpdate:
|
||||
## maxSurge: 25%
|
||||
## maxUnavailable: 25%
|
||||
##
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
serviceAccount:
|
||||
# Specifies whether a ServiceAccount should be created
|
||||
create: true
|
||||
# The name of the ServiceAccount to use. If not set and create is
|
||||
# true, a name is generated using the fullname template
|
||||
name: ""
|
||||
annotations: {}
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
# nobody
|
||||
runAsUser: 65534
|
||||
fsGroup: 65534
|
||||
resources: {}
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 100Mi
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
priorityClassName: ""
|
||||
runtimeClassName: ""
|
||||
affinity: {}
|
||||
podAnnotations: {}
|
||||
labels: {}
|
||||
livenessProbe:
|
||||
enabled: true
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
enabled: true
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
tlsMinVersion: "VersionTLS12"
|
||||
tlsCipherSuites: ""
|
||||
|
||||
extraContainers: []
|
||||
|
||||
# speaker contains configuration specific to the MetalLB speaker
|
||||
# daemonset.
|
||||
speaker:
|
||||
enabled: true
|
||||
# command: /speaker
|
||||
# -- Speaker log level. Must be one of: `all`, `debug`, `info`, `warn`, `error` or `none`
|
||||
logLevel: info
|
||||
tolerateMaster: true
|
||||
memberlist:
|
||||
enabled: true
|
||||
mlBindPort: 7946
|
||||
mlBindAddrOverride: ""
|
||||
mlSecretKeyPath: "/etc/ml_secret_key"
|
||||
excludeInterfaces:
|
||||
enabled: true
|
||||
# ignore the exclude-from-external-loadbalancer label
|
||||
ignoreExcludeLB: false
|
||||
|
||||
## @param speaker.updateStrategy.type Speaker daemonset strategy type
|
||||
## ref: https://kubernetes.io/docs/tasks/manage-daemon/update-daemon-set/
|
||||
##
|
||||
updateStrategy:
|
||||
## StrategyType
|
||||
## Can be set to RollingUpdate or OnDelete
|
||||
##
|
||||
type: RollingUpdate
|
||||
serviceAccount:
|
||||
# Specifies whether a ServiceAccount should be created
|
||||
create: true
|
||||
# The name of the ServiceAccount to use. If not set and create is
|
||||
# true, a name is generated using the fullname template
|
||||
name: ""
|
||||
annotations: {}
|
||||
securityContext: {}
|
||||
## Defines a secret name for the controller to generate a memberlist encryption secret
|
||||
## By default secretName: {{ "metallb.fullname" }}-memberlist
|
||||
##
|
||||
# secretName:
|
||||
resources: {}
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 100Mi
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
priorityClassName: ""
|
||||
affinity: {}
|
||||
## Selects which runtime class will be used by the pod.
|
||||
runtimeClassName: ""
|
||||
podAnnotations: {}
|
||||
labels: {}
|
||||
livenessProbe:
|
||||
enabled: true
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
readinessProbe:
|
||||
enabled: true
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
startupProbe:
|
||||
enabled: true
|
||||
failureThreshold: 30
|
||||
periodSeconds: 5
|
||||
# frr contains configuration specific to the MetalLB FRR container,
|
||||
# for speaker running alongside FRR.
|
||||
frr:
|
||||
enabled: false
|
||||
metricsPort: 7473
|
||||
resources: {}
|
||||
|
||||
# if set, enables a rbac proxy sidecar container on the speaker to
|
||||
# expose the frr metrics via tls.
|
||||
# secureMetricsPort: 9121
|
||||
|
||||
|
||||
reloader:
|
||||
resources: {}
|
||||
|
||||
frrMetrics:
|
||||
resources: {}
|
||||
|
||||
extraContainers: []
|
||||
|
||||
crds:
|
||||
enabled: true
|
||||
validationFailurePolicy: Fail
|
||||
|
||||
# frrk8s contains the configuration related to using an frrk8s instance
|
||||
# (github.com/metallb/frr-k8s) as the backend for the BGP implementation.
|
||||
# This allows configuring additional frr parameters in combination to those
|
||||
# applied by MetalLB.
|
||||
frrk8s:
|
||||
# if set, enables frrk8s as a backend. This is mutually exclusive to frr
|
||||
# mode.
|
||||
enabled: false
|
||||
external: false
|
||||
namespace: ""
|
||||
|
||||
@@ -24,5 +24,5 @@ appVersion: "1.0"
|
||||
|
||||
dependencies:
|
||||
- name: kube-prometheus-stack
|
||||
version: 79.7.1
|
||||
version: 79.9.0
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
@@ -544,15 +544,6 @@ spec:
|
||||
labels:
|
||||
severity: "warning"
|
||||
type: "ceph_default"
|
||||
- alert: "CephNodeDiskspaceWarning"
|
||||
annotations:
|
||||
description: "Mountpoint {{ "{{" }} $labels.mountpoint {{ "}}" }} on {{ "{{" }} $labels.nodename {{ "}}" }} will be full in less than 5 days based on the 48 hour trailing fill rate."
|
||||
summary: "Host filesystem free space is getting low"
|
||||
expr: "predict_linear(node_filesystem_free_bytes{device=~\"/.*\"}[2d], 3600 * 24 * 5) *on(instance) group_left(nodename) node_uname_info < 0"
|
||||
labels:
|
||||
oid: "1.3.6.1.4.1.50495.1.2.1.8.4"
|
||||
severity: "warning"
|
||||
type: "ceph_default"
|
||||
- alert: "CephNodeInconsistentMTU"
|
||||
annotations:
|
||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has a different MTU size ({{ "{{" }} $value {{ "}}" }}) than the median of devices named {{ "{{" }} $labels.device {{ "}}" }}."
|
||||
|
||||
@@ -20,8 +20,20 @@ traefik:
|
||||
prometheus:
|
||||
service:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
prometheusRule:
|
||||
enabled: true
|
||||
rules:
|
||||
- alert: TraefikDown
|
||||
expr: up{job="traefik"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
metrics_enabled: "true"
|
||||
context: traefik
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Traefik Down"
|
||||
description: "{{ $labels.pod }} on {{ $labels.nodename }} is down"
|
||||
deployment:
|
||||
kind: DaemonSet
|
||||
additionalContainers:
|
||||
|
||||
@@ -2,8 +2,48 @@ velero:
|
||||
backupsEnabled: true
|
||||
snapshotsEnabled: false
|
||||
metrics:
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
prometheusRule:
|
||||
enabled: true
|
||||
spec:
|
||||
- alert: VeleroBackupFailed
|
||||
annotations:
|
||||
message: Velero backup {{ $labels.schedule }} has failed
|
||||
expr: |-
|
||||
velero_backup_last_status{schedule!=""} != 1
|
||||
for: 15m
|
||||
labels:
|
||||
metrics_enabled: "true"
|
||||
severity: warning
|
||||
- alert: VeleroBackupFailing
|
||||
annotations:
|
||||
message: Velero backup {{ $labels.schedule }} has been failing for the last 12h
|
||||
expr: |-
|
||||
velero_backup_last_status{schedule!=""} != 1
|
||||
for: 12h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: VeleroNoNewBackup
|
||||
annotations:
|
||||
message: Velero backup {{ $labels.schedule }} has not run successfully in the last 25h
|
||||
expr: |-
|
||||
(
|
||||
(time() - velero_backup_last_successful_timestamp{schedule!=""}) >bool (25 * 3600)
|
||||
or
|
||||
absent(velero_backup_last_successful_timestamp{schedule!=""})
|
||||
) == 1
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: VeleroBackupPartialFailures
|
||||
annotations:
|
||||
message: Velero backup {{ $labels.schedule }} has {{ $value | humanizePercentage }} partialy failed backups
|
||||
expr: |-
|
||||
rate(velero_backup_partial_failure_total{schedule!=""}[25m])
|
||||
/ rate(velero_backup_attempt_total{schedule!=""}[25m]) > 0.5
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
configuration:
|
||||
backupStorageLocation:
|
||||
- name: weyma-truenas
|
||||
|
||||
Reference in New Issue
Block a user