# 19 for 19 minutes, but I make it 18 just to give a bit of slack.
- alert: historical_missing_metric
expr: |-
- count_over_time(up{job="prometheus"}[19m]) <= 18 unless on() present_over_time(ALERTS[19m]) unless on() time() - node_boot_time_seconds{instance="kdwg:9101"} <= 60 * 17
+ count_over_time(up{job="prometheus"}[19m]) <= 18 unless on() present_over_time(ALERTS[19m]) unless on() time() - node_boot_time_seconds{instance="kd"} <= 60 * 17
labels:
severity: warn
description: "A Prometheus job has disappeared\n VALUE = {{ $value }}"
- alert: lowpri_target_down
- expr: up{instance!~"kdwg:9101|bkex.b8.nz:9101|liex.b8.nz:9101|10.2.0.1:9100"} == 0
+ expr: up{instance!~"kdwg:9101|bkex.b8.nz:9101|liex.b8.nz:9101|10.2.0.1:9100|kwwg:9101"} == 0
for: 30m
labels:
severity: warn