diff --git a/system-apps/rook-ceph/operator/templates/rules.yaml b/system-apps/rook-ceph/operator/templates/rules.yaml index bdd3d4a..bef98df 100644 --- a/system-apps/rook-ceph/operator/templates/rules.yaml +++ b/system-apps/rook-ceph/operator/templates/rules.yaml @@ -497,61 +497,6 @@ spec: oid: "1.3.6.1.4.1.50495.1.2.1.8.1" severity: "critical" type: "ceph_default" - - alert: "CephNodeNetworkPacketDrops" - annotations: - description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet drop > 0.5% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}." - summary: "One or more NICs reports packet drops" - expr: | - ( - rate(node_network_receive_drop_total{device!="lo"}[1m]) + - rate(node_network_transmit_drop_total{device!="lo"}[1m]) - ) / ( - rate(node_network_receive_packets_total{device!="lo"}[1m]) + - rate(node_network_transmit_packets_total{device!="lo"}[1m]) - ) >= 0.0050000000000000001 and ( - rate(node_network_receive_drop_total{device!="lo"}[1m]) + - rate(node_network_transmit_drop_total{device!="lo"}[1m]) - ) >= 10 - labels: - oid: "1.3.6.1.4.1.50495.1.2.1.8.2" - severity: "warning" - type: "ceph_default" - - alert: "CephNodeNetworkPacketErrors" - annotations: - description: "Node {{ "{{" }} $labels.instance {{ "}}" }} experiences packet errors > 0.01% or > 10 packets/s on interface {{ "{{" }} $labels.device {{ "}}" }}." - summary: "One or more NICs reports packet errors" - expr: | - ( - rate(node_network_receive_errs_total{device!="lo"}[1m]) + - rate(node_network_transmit_errs_total{device!="lo"}[1m]) - ) / ( - rate(node_network_receive_packets_total{device!="lo"}[1m]) + - rate(node_network_transmit_packets_total{device!="lo"}[1m]) - ) >= 0.0001 or ( - rate(node_network_receive_errs_total{device!="lo"}[1m]) + - rate(node_network_transmit_errs_total{device!="lo"}[1m]) - ) >= 10 - labels: - oid: "1.3.6.1.4.1.50495.1.2.1.8.3" - severity: "warning" - type: "ceph_default" - - alert: "CephNodeNetworkBondDegraded" - annotations: - description: "Bond {{ "{{" }} $labels.master {{ "}}" }} is degraded on Node {{ "{{" }} $labels.instance {{ "}}" }}." - summary: "Degraded Bond on Node {{ "{{" }} $labels.instance {{ "}}" }}" - expr: | - node_bonding_slaves - node_bonding_active != 0 - labels: - severity: "warning" - type: "ceph_default" - - alert: "CephNodeInconsistentMTU" - annotations: - description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has a different MTU size ({{ "{{" }} $value {{ "}}" }}) than the median of devices named {{ "{{" }} $labels.device {{ "}}" }}." - summary: "MTU settings across Ceph hosts are inconsistent" - expr: "node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( max by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )or node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( min by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )" - labels: - severity: "warning" - type: "ceph_default" - name: "pools" rules: - alert: "CephPoolGrowthWarning"