From a8edbc2428b7f1fa6b2e800ffb388d5b2aad41ea Mon Sep 17 00:00:00 2001 From: Chris Grindstaff Date: Thu, 3 Aug 2023 02:47:28 -0400 Subject: [PATCH] feat: include lun offline ems alert (#2252) --- container/prometheus/ems_alert_rules.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/container/prometheus/ems_alert_rules.yml b/container/prometheus/ems_alert_rules.yml index 5ee20919d..29fb3253e 100644 --- a/container/prometheus/ems_alert_rules.yml +++ b/container/prometheus/ems_alert_rules.yml @@ -25,6 +25,28 @@ groups: annotations: summary: "LUN {{ $labels.lun_path }}, vol {{ $labels.volume_name }} (DSID {{ $labels.volume_dsid }}) destroyed (UUID: {{ $labels.object_uuid }})." + - alert: LUN Offline + expr: last_over_time(ems_events{message="LUN.offline"}[5m]) == 1 + labels: + severity: > + {{- if $labels.severity -}} + {{- if eq $labels.severity "alert" -}} + critical + {{- else if eq $labels.severity "error" -}} + warning + {{- else if eq $labels.severity "emergency" -}} + critical + {{- else if eq $labels.severity "notice" -}} + info + {{- else if eq $labels.severity "informational" -}} + info + {{- else -}} + {{ $labels.severity }} + {{- end -}} + {{- end -}} + annotations: + summary: "LUN {{ $labels.lun_path }}, vol {{ $labels.volume_name }} (DSID {{ $labels.volume_dsid }}) was brought offline (UUID: {{ $labels.object_uuid }})." + - alert: NVMe Namespace Destroyed expr: last_over_time(ems_events{message="NVMeNS.destroy"}[5m]) == 1 labels: