From 6f2603d3a0f9e5a975aacbb0de87393c98bb41f4 Mon Sep 17 00:00:00 2001 From: William P Date: Tue, 30 Dec 2025 23:50:18 -0500 Subject: [PATCH] remove redundant node alerts --- .../rook-ceph/operator/templates/rules.yaml | 55 ------------------- 1 file changed, 55 deletions(-) diff --git a/system-apps/rook-ceph/operator/templates/rules.yaml b/system-apps/rook-ceph/operator/templates/rules.yaml index bdd3d4a..bef98df 100644 --- a/system-apps/rook-ceph/operator/templates/rules.yaml +++ b/system-apps/rook-ceph/operator/templates/rules.yaml @@ -497,61 +497,6 @@ spec: oid: "1.3.6.1.4.1.50495.1.2.1.8.1" severity: "critical" type: "ceph_default" - - alert: "CephNodeNetworkPacketDrops" - annotations: - description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet drop > 0.5% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}." - summary: "One or more NICs reports packet drops" - expr: | - ( - rate(node_network_receive_drop_total{device!="lo"}[1m]) + - rate(node_network_transmit_drop_total{device!="lo"}[1m]) - ) / ( - rate(node_network_receive_packets_total{device!="lo"}[1m]) + - rate(node_network_transmit_packets_total{device!="lo"}[1m]) - ) >= 0.0050000000000000001 and ( - rate(node_network_receive_drop_total{device!="lo"}[1m]) + - rate(node_network_transmit_drop_total{device!="lo"}[1m]) - ) >= 10 - labels: - oid: "1.3.6.1.4.1.50495.1.2.1.8.2" - severity: "warning" - type: "ceph_default" - - alert: "CephNodeNetworkPacketErrors" - annotations: - description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet errors > 0.01% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}." - summary: "One or more NICs reports packet errors" - expr: | - ( - rate(node_network_receive_errs_total{device!="lo"}[1m]) + - rate(node_network_transmit_errs_total{device!="lo"}[1m]) - ) / ( - rate(node_network_receive_packets_total{device!="lo"}[1m]) + - rate(node_network_transmit_packets_total{device!="lo"}[1m]) - ) >= 0.0001 or ( - rate(node_network_receive_errs_total{device!="lo"}[1m]) + - rate(node_network_transmit_errs_total{device!="lo"}[1m]) - ) >= 10 - labels: - oid: "1.3.6.1.4.1.50495.1.2.1.8.3" - severity: "warning" - type: "ceph_default" - - alert: "CephNodeNetworkBondDegraded" - annotations: - description: "Bond {{ "{{" }} $labels.master {{ "}}" }} is degraded on Node {{ "{{" }} $labels.instance {{ "}}" }}." - summary: "Degraded Bond on Node {{ "{{" }} $labels.instance {{ "}}" }}" - expr: | - node_bonding_slaves - node_bonding_active != 0 - labels: - severity: "warning" - type: "ceph_default" - - alert: "CephNodeInconsistentMTU" - annotations: - description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has a different MTU size ({{ "{{" }} $value {{ "}}" }}) than the median of devices named {{ "{{" }} $labels.device {{ "}}" }}." - summary: "MTU settings across Ceph hosts are inconsistent" - expr: "node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( max by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )or node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( min by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )" - labels: - severity: "warning" - type: "ceph_default" - name: "pools" rules: - alert: "CephPoolGrowthWarning"