mostly new music stuff

[distro-setup] / filesystem / etc / prometheus / rules / iank.yml
diff --git a/filesystem/etc/prometheus/rules/iank.yml b/filesystem/etc/prometheus/rules/iank.yml

index 651eb00de164a8a41b84fe53d6429a5821f8f3d9..f64322b2c98ef4f48755daec9fe81f185b8c9488 100644 (file)
--- a/filesystem/etc/prometheus/rules/iank.yml
+++ b/filesystem/etc/prometheus/rules/iank.yml
@@ -182,7 +182,7 @@ groups:
      labels:
        severity: day
      annotations:
-      summary: 'jr -u mailtest-check -e'
+      summary: 'jr -u mailtest-check -e -n 10000'
  
    - alert: mailtest_check_missing_dnswl
      expr: |-
@@ -191,7 +191,7 @@ groups:
      labels:
        severity: day
      annotations:
-      summary: 'jr -u mailtest-check -e'
+      summary: 'jr -u mailtest-check -e -n 10000'
  
    # We expect to be getting metrics, if we come up and notice we have
    # any missing in the past, and it wasn't from a reboot, and we haven't
@@ -203,13 +203,16 @@ groups:
      labels:
        severity: warn
  
-  - alert: 1pmtest
-    expr: hour() == 17 and minute() < 5
+  # 10 am friday. but, do it 1 minute early so it is closer to actually
+  # firing at 10 am.
+  - alert: dead_man_test
+    expr: |-
+      ( hour() == 13 and minute() >= 59 or hour() == 14 and minute() < 3 )  and day_of_week() == 5
      for: 0m
      labels:
        severity: daytest
      annotations:
-      summary: Prometheus daily test alert
+      summary: Prometheus weekly test alert
  
  
  #### Inhibit notes ####
@@ -278,6 +281,9 @@ groups:
      annotations:
        summary: Target down for 30m
  
+  # note PrometheusAllTargetsMissing is intentionally omitted because it
+  # is redundant to the above.
+
    - alert: target_down
      expr: up{instance=~"kdwg:9101|bkex.b8.nz:9101|liex.b8.nz:9101|10.2.0.1:9100"} == 0
      for: 5m
@@ -294,10 +300,6 @@ groups:
      annotations:
        summary: MAIL_HOST likely down for 5m
  
-
-# note, the next upstream metric is intentionally omitted:
-# https://github.com/samber/awesome-prometheus-alerts/issues/283
-
    - alert: PrometheusConfigurationReloadFailure
      expr: prometheus_config_last_reload_successful != 1
      for: 30m