Skip to content

Commit

Permalink
[barbican] Update snmp-hsm alerts (#7368)
Browse files Browse the repository at this point in the history
Alert for device down or metric absence.
---------

Co-authored-by: rajiv <[email protected]>
  • Loading branch information
BerndKue and rajivmucheli authored Nov 7, 2024
1 parent dc3d887 commit 43e2476
Showing 1 changed file with 6 additions and 51 deletions.
57 changes: 6 additions & 51 deletions prometheus-exporters/snmp-exporter/alerts/snmp-hsm.alerts
Original file line number Diff line number Diff line change
Expand Up @@ -92,62 +92,17 @@ groups:
description: "HSM Client License in {{ $labels.devicename }} is {{ $value }}"
summary: "HSM Client License in {{ $labels.devicename }} is {{ $value }}"

- alert: hsm01B700IsUnReachable
expr: irate(snmp_hsm_hsmUpTime{hsmModel="Luna G7",devicename=~"hsm01.*"}[5m]) != 0
for: 1h
labels:
severity: info
service: barbican
context: hsm
meta: "HSM01 B700 Device is UnReachable"
dashboard: hsm
no_alert_on_absence: "true"
support_group: foundation
annotations:
description: "HSM01 B700 Device is UnReachable"
summary: "HSM01 B700 Device is UnReachable"

- alert: hsm02B700IsUnReachable
expr: irate(snmp_hsm_hsmUpTime{hsmModel="Luna G7",devicename=~"hsm02.*"}[5m]) != 0
for: 1h
labels:
severity: info
service: barbican
context: hsm
meta: "HSM02 B700 Device is UnReachable"
dashboard: hsm
no_alert_on_absence: "true"
support_group: foundation
annotations:
description: "HSM02 B700 Device is UnReachable"
summary: "HSM02 B700 Device is UnReachable"

- alert: hsm01A790IsUnReachable
expr: irate(snmp_hsm_hsmUpTime{hsmModel="Luna K7",devicename=~"hsm01.*"}[5m]) < 0.95
for: 15m
labels:
severity: info
service: barbican
context: hsm
meta: "HSM01 A790 Device is UnReachable"
dashboard: hsm
no_alert_on_absence: "true"
support_group: foundation
annotations:
description: "HSM01 A790 Device is UnReachable"
summary: "HSM01 A790 Device is UnReachable"

- alert: hsm02A790IsUnReachable
expr: irate(snmp_hsm_hsmUpTime{hsmModel="Luna K7",devicename=~"hsm02.*"}[5m]) < 0.95
- alert: hsmDeviceIsUnReachable
expr: count(up{job="scrapeConfig/infra-monitoring/snmp-exporter-hsm"} == 0 or absent(up{job="scrapeConfig/infra-monitoring/snmp-exporter-hsm"})) by (name, device_type)
for: 15m
labels:
severity: info
severity: warning
service: barbican
context: hsm
meta: "HSM02 A790 Device is UnReachable"
meta: "{{ $labels.name }} {{ $labels.device_type }} Device is UnReachable"
dashboard: hsm
no_alert_on_absence: "true"
support_group: foundation
annotations:
description: "HSM02 A790 Device is UnReachable"
summary: "HSM02 A790 Device is UnReachable"
description: "{{ $labels.name }} {{ $labels.device_type }} Device is UnReachable"
summary: "{{ $labels.name }} {{ $labels.device_type }} Device is UnReachable"

0 comments on commit 43e2476

Please sign in to comment.