X-Git-Url: https://iankelling.org/git/?a=blobdiff_plain;f=filesystem%2Fetc%2Fprometheus%2Frules%2Fiank.yml;h=47012ccbcb74b20f517f825d1cc327fd70fe2c0d;hb=c5021d1e8ad29f946b28d7a22d959e691e28bf32;hp=f64322b2c98ef4f48755daec9fe81f185b8c9488;hpb=b28eebdf9143aa17733f233b30b96f462008f3b6;p=distro-setup diff --git a/filesystem/etc/prometheus/rules/iank.yml b/filesystem/etc/prometheus/rules/iank.yml index f64322b..47012cc 100644 --- a/filesystem/etc/prometheus/rules/iank.yml +++ b/filesystem/etc/prometheus/rules/iank.yml @@ -199,7 +199,7 @@ groups: # 19 for 19 minutes, but I make it 18 just to give a bit of slack. - alert: historical_missing_metric expr: |- - count_over_time(up{job="prometheus"}[19m]) <= 18 unless on() present_over_time(ALERTS[19m]) unless on() time() - node_boot_time_seconds{instance="kdwg:9101"} <= 60 * 17 + count_over_time(up{job="prometheus"}[19m]) <= 18 unless on() present_over_time(ALERTS[19m]) unless on() time() - node_boot_time_seconds{instance="kd"} <= 60 * 17 labels: severity: warn @@ -274,7 +274,7 @@ groups: description: "A Prometheus job has disappeared\n VALUE = {{ $value }}" - alert: lowpri_target_down - expr: up{instance!~"kdwg:9101|bkex.b8.nz:9101|liex.b8.nz:9101|10.2.0.1:9100"} == 0 + expr: up{instance!~"kdwg:9101|bkex.b8.nz:9101|liex.b8.nz:9101|10.2.0.1:9100|kwwg:9101"} == 0 for: 30m labels: severity: warn