Skip to content
Snippets Groups Projects
Commit 7dc47ab9 authored by Pierre Krieger's avatar Pierre Krieger Committed by GitHub
Browse files

Add Prometheus alerts if unbounded channels are too large (#7866)

* Add Prometheus alerts if unbounded channels are too large

* Tweaks
parent 014e6f03
No related merge requests found
......@@ -147,3 +147,28 @@ groups:
message: 'Authority discovery on node {{ $labels.instance }} fails to
process more than 50 % of the values found on the DHT for more than 2
hours.'
- alert: UnboundedChannelPersistentlyLarge
expr: '(
(polkadot_unbounded_channel_len{action = "send"} -
ignoring(action) polkadot_unbounded_channel_len{action = "received"})
or on(instance) polkadot_unbounded_channel_len{action = "send"}
) >= 200'
for: 5m
labels:
severity: warning
annotations:
message: 'Channel {{ $labels.entity }} on node {{ $labels.instance }} contains
more than 200 items for more than 5 minutes. Node might be frozen.'
- alert: UnboundedChannelVeryLarge
expr: '(
(polkadot_unbounded_channel_len{action = "send"} -
ignoring(action) polkadot_unbounded_channel_len{action = "received"})
or on(instance) polkadot_unbounded_channel_len{action = "send"}
) > 5000'
labels:
severity: warning
annotations:
message: 'Channel {{ $labels.entity }} on node {{ $labels.instance }} contains more than
5000 items.'
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment