3 - name: ansible managed alert rules
5 - alert: NodeFilesystemAlmostOutOfSpace
7 description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
8 only {{ printf "%.2f" $value }}% available space left.
9 summary: Filesystem has less than 5% space left.
12 node_filesystem_avail_bytes{job="node",fstype!=""} / node_filesystem_size_bytes{job="node",fstype!=""} * 100 < 5
14 node_filesystem_readonly{job="node",fstype!=""} == 0
19 - alert: NodeFilesystemAlmostOutOfSpace
21 description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
22 only {{ printf "%.2f" $value }}% available space left.
23 summary: Filesystem has less than 3% space left.
26 node_filesystem_avail_bytes{job="node",fstype!=""} / node_filesystem_size_bytes{job="node",fstype!=""} * 100 < 3
28 node_filesystem_readonly{job="node",fstype!=""} == 0
33 - alert: NodeFilesystemFilesFillingUp
35 description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
36 only {{ printf "%.2f" $value }}% available inodes left and is filling up.
37 summary: Filesystem is predicted to run out of inodes within the next 24 hours.
40 node_filesystem_files_free{job="node",fstype!=""} / node_filesystem_files{job="node",fstype!=""} * 100 < 40
42 predict_linear(node_filesystem_files_free{job="node",fstype!=""}[6h], 24*60*60) < 0
44 node_filesystem_readonly{job="node",fstype!=""} == 0
49 - alert: NodeFilesystemFilesFillingUp
51 description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
52 only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
53 summary: Filesystem is predicted to run out of inodes within the next 4 hours.
56 node_filesystem_files_free{job="node",fstype!=""} / node_filesystem_files{job="node",fstype!=""} * 100 < 20
58 predict_linear(node_filesystem_files_free{job="node",fstype!=""}[6h], 4*60*60) < 0
60 node_filesystem_readonly{job="node",fstype!=""} == 0
65 - alert: NodeFilesystemAlmostOutOfFiles
67 description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
68 only {{ printf "%.2f" $value }}% available inodes left.
69 summary: Filesystem has less than 5% inodes left.
72 node_filesystem_files_free{job="node",fstype!=""} / node_filesystem_files{job="node",fstype!=""} * 100 < 5
74 node_filesystem_readonly{job="node",fstype!=""} == 0
79 - alert: NodeFilesystemAlmostOutOfFiles
81 description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
82 only {{ printf "%.2f" $value }}% available inodes left.
83 summary: Filesystem has less than 3% inodes left.
86 node_filesystem_files_free{job="node",fstype!=""} / node_filesystem_files{job="node",fstype!=""} * 100 < 3
88 node_filesystem_readonly{job="node",fstype!=""} == 0
93 - alert: NodeNetworkReceiveErrs
95 description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
96 {{ printf "%.0f" $value }} receive errors in the last two minutes.'
97 summary: Network interface is reporting many receive errors.
99 increase(node_network_receive_errs_total[2m]) > 10
103 - alert: NodeNetworkTransmitErrs
105 description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
106 {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
107 summary: Network interface is reporting many transmit errors.
109 increase(node_network_transmit_errs_total[2m]) > 10
113 - alert: NodeHighNumberConntrackEntriesUsed
115 description: '{{ $value | humanizePercentage }} of conntrack entries are used'
116 summary: Number of conntrack are getting close to the limit
118 (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
121 - alert: NodeClockSkewDetected
123 message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure
124 NTP is configured correctly on this host.
125 summary: Clock skew detected.
128 node_timex_offset_seconds > 0.05
130 deriv(node_timex_offset_seconds[5m]) >= 0
134 node_timex_offset_seconds < -0.05
136 deriv(node_timex_offset_seconds[5m]) <= 0
141 - alert: NodeClockNotSynchronising
143 message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured
145 summary: Clock not synchronising.
147 min_over_time(node_timex_sync_status[5m]) == 0
152 expr: node_systemd_version >= 300
156 description: '{{ $labels.instance }} ianktest.'
157 summary: Instance {{ $labels.instance }} - ianktest