labels:
severity: day
annotations:
- summary: 'jr -u mailtest-check -e'
+ summary: 'jr -u mailtest-check -e -n 10000'
- alert: mailtest_check_missing_dnswl
expr: |-
labels:
severity: day
annotations:
- summary: 'jr -u mailtest-check -e'
+ summary: 'jr -u mailtest-check -e -n 10000'
# We expect to be getting metrics, if we come up and notice we have
# any missing in the past, and it wasn't from a reboot, and we haven't
labels:
severity: warn
- - alert: 1pmtest
- expr: hour() == 17 and minute() < 5
+ # 10 am friday. but, do it 1 minute early so it is closer to actually
+ # firing at 10 am.
+ - alert: dead_man_test
+ expr: |-
+ ( hour() == 13 and minute() >= 59 or hour() == 14 and minute() < 3 ) and day_of_week() == 5
for: 0m
labels:
severity: daytest
annotations:
- summary: Prometheus daily test alert
+ summary: Prometheus weekly test alert
#### Inhibit notes ####
annotations:
summary: Target down for 30m
+ # note PrometheusAllTargetsMissing is intentionally omitted because it
+ # is redundant to the above.
+
- alert: target_down
expr: up{instance=~"kdwg:9101|bkex.b8.nz:9101|liex.b8.nz:9101|10.2.0.1:9100"} == 0
for: 5m
annotations:
summary: MAIL_HOST likely down for 5m
-
-# note, the next upstream metric is intentionally omitted:
-# https://github.com/samber/awesome-prometheus-alerts/issues/283
-
- alert: PrometheusConfigurationReloadFailure
expr: prometheus_config_last_reload_successful != 1
for: 30m