mainly alerting improvements
[distro-setup] / mailtest-check
index 363401780546c8849ded6acedf7242cd0adfbaec..e28d5ae966750233f5778c20c26a7c1d58be54a6 100755 (executable)
@@ -16,18 +16,27 @@ shopt -s nullglob
 
 e() { $int || return 0; printf "mailtest-check: %s\n" "$*"; }
 
-
-## Minutes before we give error.
-# We run this cronjob along with sending the test email every 5 minutes,
-# so give it 1 minute to arrive, then if the latest email is older than
-# 7 minutes, the last 2 haven't arrived in a reasonable amount of time.
-# However, when machines reboot things can get delayed, so add 10 mins,
-# not sure if that is a good number or not.
-min_limit=17
+getspamdpid() {
+  if [[ ! $spamdpid || ! -d /proc/$spamdpid ]]; then
+    # try twice in case we are restarting, it happens.
+    for i in 1 2; do
+      spamdpid=$(systemctl show --property MainPID --value spamassassin | sed 's/^[10]$//' ||:)
+      if [[ $spamdpid ]]; then
+        break
+      fi
+      sleep 30
+    done
+  fi
+}
+pr() {
+  if [[ -e /var/lib/prometheus/node-exporter ]]; then
+    cat >>/var/lib/prometheus/node-exporter/mailtest-check.prom.$$
+  fi
+}
 
 
-# spamassassin checking takes about 8 seconds. only do that every
-# once in a while.
+#### begin arg processing ####
+# spamassassin checking takes about 8 seconds.
 slow=false
 if [[ $1 == slow ]]; then
   slow=true
@@ -46,89 +55,77 @@ fi
 if [[ $1 == nonint ]]; then
   int=false
 fi
+#### end arg processing ####
 
 
 if ! $int; then
   sleep 60
 fi
 
-# avoid errors like this:
-# Nov  8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/WLBLEval.pm: lib/Mail/SpamAssassin/Plugin/WLBLEval.pm: Permission denied at (eval 59) line 1.
-#Nov  8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/VBounce.pm: lib/Mail/SpamAssassin/Plugin/VBounce.pm: Permission denied at (eval 60) line 1.
-# i dont know why, i just found the solution online
-cd /m/md
+
 # TODO, get je to deliver the local mailbox: /m/md/INBOX
 # dovecot appears to setup, i can t be sure.
 
-case $HOSTNAME in
-  bk)
-    folders=(/m/md/{expertpathologyreview.com,amnimal.ninja}/testignore)
-    froms=(ian@iankelling.org z@zroe.org testignore@je.b8.nz iank@gnu.org)
-    ;;
-  je)
-    froms=(ian@iankelling.org z@zroe.org testignore@expertpathologyreview.com testignore@amnimal.ninja)
-    folders=(/m/md/je.b8.nz/testignore)
-    ;;
-  *)
-    folders=(/m/md/l/testignore)
-    froms=(testignore@je.b8.nz testignore@expertpathologyreview.com testignore@amnimal.ninja ian@iankelling.org z@zroe.org iank@gnu.org)
-    if ! $int; then
-      timeout 120 rsync --chown iank:iank -e "ssh -oIdentitiesOnly=yes -F /dev/null -i /root/.ssh/jtuttle" -t --inplace -r 'jtuttle@fencepost.gnu.org:/home/j/jtuttle/Maildir/new/' /m/md/l/testignore/new
-    fi
-    ;;
-esac
 
-getspamdpid() {
-  if [[ ! $spamdpid || ! -d /proc/$spamdpid ]]; then
-    # try twice in case we are restarting, it happens.
-    for i in 1 2; do
-      spamdpid=$(systemctl show --property MainPID --value spamassassin | sed 's/^[10]$//' ||:)
-      if [[ $spamdpid ]]; then
-        break
+
+main() {
+
+  case $HOSTNAME in
+    bk)
+      folders=(/m/md/{expertpathologyreview.com,amnimal.ninja}/testignore)
+      froms=(ian@iankelling.org z@zroe.org testignore@je.b8.nz iank@gnu.org)
+      ;;
+    je)
+      froms=(ian@iankelling.org z@zroe.org testignore@expertpathologyreview.com testignore@amnimal.ninja)
+      folders=(/m/md/je.b8.nz/testignore)
+      ;;
+    *)
+      folders=(/m/md/l/testignore)
+      froms=(testignore@je.b8.nz testignore@expertpathologyreview.com testignore@amnimal.ninja ian@iankelling.org z@zroe.org iank@gnu.org)
+      if ! $int; then
+        timeout 120 rsync --chown iank:iank -e "ssh -oIdentitiesOnly=yes -F /dev/null -i /root/.ssh/jtuttle" -t --inplace -r 'jtuttle@fencepost.gnu.org:/home/j/jtuttle/Maildir/new/' /m/md/l/testignore/new
       fi
-      sleep 30
-    done
-  fi
-}
-getspamdpid
-pr() {
+      ;;
+  esac
+
+
+  # avoid errors like this:
+  # Nov  8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/WLBLEval.pm: lib/Mail/SpamAssassin/Plugin/WLBLEval.pm: Permission denied at (eval 59) line 1.
+  #Nov  8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/VBounce.pm: lib/Mail/SpamAssassin/Plugin/VBounce.pm: Permission denied at (eval 60) line 1.
+  # i dont know why, i just found the solution online
+  cd /m/md
+
+
+  getspamdpid
+  # first time we write, overwrite anything existing
   if [[ -e /var/lib/prometheus/node-exporter ]]; then
-    cat >>/var/lib/prometheus/node-exporter/mailtest-check.prom.$$
-  fi
-}
-# first time we write, overwrite anything existing
-if [[ -e /var/lib/prometheus/node-exporter ]]; then
-  cat >/var/lib/prometheus/node-exporter/mailtest-check.prom.$$ <<EOF
+    cat >/var/lib/prometheus/node-exporter/mailtest-check.prom.$$ <<EOF
 mailtest_check_found_spamd_pid_bool $(( ${spamdpid:-0} > 0 ))
 EOF
-fi
-e spamdpid: $spamdpid
-if [[ ! $spamdpid ]]; then
-  echo $HOSTNAME mailtest spamd pid not found. systemctl status spamassassin:
-  systemctl status spamassassin
-fi
-tmpfile=$(mktemp)
-declare -i unexpected=0
-for folder in ${folders[@]}; do
-  for from in ${froms[@]}; do
-    latest=
-    last_sec=0
-
-    if ! grep -rlFx "From: $from" $folder/{new,cur} >$tmpfile; then
-      e "no message found from: $from"
-      continue
-    fi
-    # webmail sends them to cur it seems
-    while read -r file; do
-      if [[ $file -nt $latest ]]; then
-        latest=$file
+  fi
+  e spamdpid: $spamdpid
+  if [[ ! $spamdpid ]]; then
+    echo mailtest spamd pid not found. systemctl status spamassassin:
+    systemctl status spamassassin
+  fi
+  tmpfile=$(mktemp)
+  declare -i unexpected=0
+  for folder in ${folders[@]}; do
+    for from in ${froms[@]}; do
+      latest=
+      last_sec=0
+
+      if ! grep -rlFx "From: $from" $folder/{new,cur} >$tmpfile; then
+        echo "no message found from: $from"
+        continue
       fi
-    done <$tmpfile
+      # webmail sends them to cur it seems
+      while read -r file; do
+        if [[ $file -nt $latest ]]; then
+          latest=$file
+        fi
+      done <$tmpfile
 
-    if [[ ! $latest ]]; then
-      # 10 is an arbitrary bad value
-      unexpected+=10
-    else
       to=$(awk '/^Envelope-to: / {print $2}' $latest)
       last_sec=$(awk '/^Subject: / {print $4}' $latest)
 
@@ -205,37 +202,55 @@ for folder in ${folders[@]}; do
             if (( ${#missing[@]} )); then
               printf "missing %s" "${missing[*]}"
             fi
-            echo
-            echo mailtest-check: cat $latest:
-            cat $latest
-            echo mailtest-check: end of cat
-            printf "$(tput setaf 5 2>/dev/null ||:)█$(tput sgr0 2>/dev/null||:)%.0s" $(eval echo "{1..${COLUMNS:-60}}")
+            echo # ends our printf string buildup
+
+            if $int; then
+              echo mailtest-check: cat $latest:
+              cat $latest
+              echo mailtest-check: end of cat
+              echo "$(tput setaf 5 2>/dev/null ||:)█$(tput sgr0 2>/dev/null||:)%.0s" $(eval echo "{1..${COLUMNS:-60}}")
+            fi
           fi
         fi # if spamdpid
       fi # if $slow
-    fi # if [[ $latest ]]
 
-    now=$EPOCHSECONDS
-    limit=$(( now - 60 * min_limit ))
-    age_sec=$(( now - last_sec ))
-    e $((age_sec / 60)):$(( age_sec % 60 )) ago. to:$to from:$from $latest
+      now=$EPOCHSECONDS
+      age_sec=$(( now - last_sec ))
+      e $((age_sec / 60)):$(( age_sec % 60 )) ago. to:$to from:$from $latest
 
-    if (( last_sec <= limit )); then
-      echo $HOSTNAME mailtest $folder $from $(date -d @$last_sec +'%a %m-%d %H:%M')
-    fi
-    # usec = unix seconds
-    pr <<EOF
+      # usec = unix seconds
+      pr <<EOF
 mailtest_check_last_usec{folder="$folder",from="$from"} $last_sec
 EOF
+    done
+    unexpected=$(( unexpected + ${#results[@]} + ${#missing[@]} ))
   done
-done
-if $slow; then
-  pr <<EOF
+  if $slow; then
+    pr <<EOF
 mailtest_check_unexpected_spamd_results $unexpected
 EOF
-fi
+  fi
 
-dir=/var/lib/prometheus/node-exporter
-if [[ -e $dir  ]]; then
-  mv $dir/mailtest-check.prom.$$ $dir/mailtest-check.prom
+  dir=/var/lib/prometheus/node-exporter
+  if [[ -e $dir  ]]; then
+    mv $dir/mailtest-check.prom.$$ $dir/mailtest-check.prom
+    # note: node_textfile_mtime_seconds will tell us when this last happened. useful for debugging.
+  fi
+}
+
+loop-main() {
+  while true; do
+    premain_sec=$EPOCHSECONDS
+    main
+    sleep $(( 300 - ( $EPOCHSECONDS - premain_sec ) ))
+  done
+}
+
+
+if [[ $INVOCATION_ID ]]; then
+  loop-main
+else
+  main
 fi
+
+exit 0