host info updates
[distro-setup] / mailtest-check
index 3d4d8a4cee7d86ef7af0e02e7b45fcbb38969236..856d774d32a82995fa93f31efdc17a46e9218ab5 100755 (executable)
@@ -1,14 +1,35 @@
 #!/bin/bash
+# I, Ian Kelling, follow the GNU license recommendations at
+# https://www.gnu.org/licenses/license-recommendations.en.html. They
+# recommend that small programs, < 300 lines, be licensed under the
+# Apache License 2.0. This file contains or is part of one or more small
+# programs. If a small program grows beyond 300 lines, I plan to switch
+# its license to GPL.
 
-# Usage: mail-test-check [slow] [anything]
+# Copyright 2024 Ian Kelling
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Usage: mail-test-check [slow] [int|nonint]
 #
 # slow: do slow checks, like spamassassin
 #
-# anything: consider non-interactive, dont print unless something went
+# for  non-interactive, dont print unless something went
 # wrong
 
 
-source /b/errhandle/err
+source /b/bash-bear-trap/bash-bear
 
 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
 
@@ -16,18 +37,22 @@ shopt -s nullglob
 
 e() { $int || return 0; printf "mailtest-check: %s\n" "$*"; }
 
-
-## Minutes before we give error.
-# We run this cronjob along with sending the test email every 5 minutes,
-# so give it 1 minute to arrive, then if the latest email is older than
-# 7 minutes, the last 2 haven't arrived in a reasonable amount of time.
-# However, when machines reboot things can get delayed, so add 10 mins,
-# not sure if that is a good number or not.
-min_limit=17
+getspamdpid() {
+  if [[ ! $spamdpid || ! -d /proc/$spamdpid ]]; then
+    # try twice in case we are restarting, it happens.
+    for (( i=0; i<2; i++ )); do
+      spamdpid=$(systemctl show --property MainPID --value spamassassin | sed 's/^[10]$//' ||:)
+      if [[ $spamdpid ]]; then
+        break
+      fi
+      sleep 30
+    done
+  fi
+}
 
 
-# spamassassin checking takes about 8 seconds. only do that every
-# once in a while.
+#### begin arg processing ####
+# spamassassin checking takes about 8 seconds.
 slow=false
 if [[ $1 == slow ]]; then
   slow=true
@@ -46,88 +71,121 @@ fi
 if [[ $1 == nonint ]]; then
   int=false
 fi
+#### end arg processing ####
 
+# we put this in to avoid dns errors that happen on reboot,
+# but I want to debug them.
+# if ! $int; then
+#   sleep 60
+# fi
 
-if ! $int; then
-  sleep 60
-fi
 
-# avoid errors like this:
-# Nov  8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/WLBLEval.pm: lib/Mail/SpamAssassin/Plugin/WLBLEval.pm: Permission denied at (eval 59) line 1.
-#Nov  8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/VBounce.pm: lib/Mail/SpamAssassin/Plugin/VBounce.pm: Permission denied at (eval 60) line 1.
-# i dont know why, i just found the solution online
-cd /m/md
 # TODO, get je to deliver the local mailbox: /m/md/INBOX
 # dovecot appears to setup, i can t be sure.
 
+source /a/bin/bash_unpublished/source-state
+
+doprom=false
 case $HOSTNAME in
-  bk)
-    folders=(/m/md/{expertpathologyreview.com,amnimal.ninja}/testignore)
-    froms=(ian@iankelling.org z@zroe.org testignore@je.b8.nz iank@gnu.org)
-    ;;
-  je)
-    froms=(ian@iankelling.org z@zroe.org testignore@expertpathologyreview.com testignore@amnimal.ninja)
-    folders=(/m/md/je.b8.nz/testignore)
+  $MAIL_HOST|bk|je)
+    doprom=true
     ;;
   *)
-    folders=(/m/md/l/testignore)
-    froms=(testignore@je.b8.nz testignore@expertpathologyreview.com testignore@amnimal.ninja ian@iankelling.org z@zroe.org iank@gnu.org)
-    if ! $int; then
-      timeout 120 rsync --chown iank:iank -e "ssh -oIdentitiesOnly=yes -F /dev/null -i /root/.ssh/jtuttle" -t --inplace -r 'jtuttle@fencepost.gnu.org:/home/j/jtuttle/Maildir/new/' /m/md/l/testignore/new
-    fi
+    rm -f /var/lib/prometheus/node-exporter/mailtest-check.prom*
     ;;
 esac
 
-getspamdpid() {
-  if [[ ! $spamdpid || ! -d /proc/$spamdpid ]]; then
-    # try twice in case we are restarting, it happens.
-    for i in 1 2; do
-      spamdpid=$(systemctl show --property MainPID --value spamassassin | sed 's/^[10]$//' ||:)
-      if [[ $spamdpid ]]; then
-        break
+main() {
+
+  local -a p_unexpected_spamd_results p_missing_dnswl p_last_usec
+  case $HOSTNAME in
+    bk)
+      folders=(/m/md/{expertpathologyreview.com,amnimal.ninja}/testignore)
+      froms=(ian@iankelling.org z@zroe.org testignore@je.b8.nz iank@gnu.org)
+      ;;
+    je)
+      froms=(ian@iankelling.org z@zroe.org iank@gnu.org testignore@amnimal.ninja)
+      folders=(/m/md/je.b8.nz/testignore)
+      ;;
+    *)
+      folders=(/m/md/l/testignore)
+      froms=(testignore@je.b8.nz testignore@expertpathologyreview.com testignore@amnimal.ninja ian@iankelling.org z@zroe.org)
+      if ! $int; then
+        ### begin rsyncing fencepost email ###
+        # We dont want to exit if rsync fails, that will get caught by
+        # our later test by virtue of not having the latest email.
+        did_rsync=false
+        try_start_time=$EPOCHSECONDS
+        try_limit=140 # somewhat arbitrary value
+        while ! $did_rsync; do
+          try_left=$(( try_limit - ( EPOCHSECONDS - try_start_time) ))
+          timeout=120 # somewhat arbitrary value
+          if (( try_left < 0 )); then
+            echo "mailtest-check: failed to rsync fencepost > $try_limit seconds"
+            break
+          fi
+          if (( try_left < timeout )); then
+            timeout=$try_left
+          fi
+          if timeout $timeout rsync --chown iank:iank -e "ssh -oIdentitiesOnly=yes -F /dev/null -i /root/.ssh/jtuttle" -t --inplace -r 'jtuttle@fencepost.gnu.org:/home/j/jtuttle/Maildir/new/' /m/md/l/testignore/new; then
+            did_rsync=true
+          else
+            sleep 4
+          fi
+        done
+        if ! $did_rsync; then
+          echo mailtest-check: warning: fencepost rsync failed
+        fi
+        ### end rsyncing fencepost email ###
       fi
-      sleep 30
-    done
-  fi
-}
-getspamdpid
-pr() {
+      ;;
+  esac
+
+
+  # avoid errors like this:
+  # Nov  8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/WLBLEval.pm: lib/Mail/SpamAssassin/Plugin/WLBLEval.pm: Permission denied at (eval 59) line 1.
+  #Nov  8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/VBounce.pm: lib/Mail/SpamAssassin/Plugin/VBounce.pm: Permission denied at (eval 60) line 1.
+  # i dont know why, i just found the solution online
+  cd /m/md
+
+
+  getspamdpid
+  # first time we write, overwrite anything existing
   if [[ -e /var/lib/prometheus/node-exporter ]]; then
-    cat >>/var/lib/prometheus/node-exporter/mailtest-check.prom.$$
-  fi
-}
-pr <<EOF
+    cat >/var/lib/prometheus/node-exporter/mailtest-check.prom.$$ <<EOF
 mailtest_check_found_spamd_pid_bool $(( ${spamdpid:-0} > 0 ))
 EOF
-e spamdpid: $spamdpid
-if [[ ! $spamdpid ]]; then
-  echo $HOSTNAME mailtest spamd pid not found. systemctl status spamassassin:
-  systemctl status spamassassin
-fi
-tmpfile=$(mktemp)
-declare -i unexpected=0
-for folder in ${folders[@]}; do
-  for from in ${froms[@]}; do
-    latest=
-    last_sec=0
-
-    if ! grep -rlFx "From: $from" $folder/{new,cur} >$tmpfile; then
-      e "no message found from: $from"
-      continue
-    fi
-    # webmail sends them to cur it seems
-    while read -r file; do
-      if [[ $file -nt $latest ]]; then
-        latest=$file
+  fi
+  e spamdpid: $spamdpid
+  if [[ ! $spamdpid ]]; then
+    echo mailtest spamd pid not found. systemctl status spamassassin:
+    systemctl status spamassassin
+  fi
+  tmpfile=$(mktemp)
+  declare -i unexpected=0
+  for folder in ${folders[@]}; do
+    for from in ${froms[@]}; do
+      declare -i missing_dnswl=0
+      #declare -i dnsfail=0
+      declare -i unexpected=0
+      latest=
+      last_sec=0
+
+      if ! grep -rlFx "From: $from" $folder/{new,cur} >$tmpfile; then
+        echo "no message found from: $from"
+        continue
       fi
-    done <$tmpfile
+      # webmail sends them to cur it seems
+      while read -r file; do
+        file_sec=$(awk '/^Subject: / {print $4}' $file)
+        if [[ $file_sec ]] && (( file_sec > last_sec )); then
+          latest=$file
+          last_sec="$file_sec"
+        fi
+      done <$tmpfile
+      rm -f $tmpfile
 
-    if [[ ! $latest ]]; then
-      # 10 is an arbitrary bad value
-      unexpected+=10
-    else
       to=$(awk '/^Envelope-to: / {print $2}' $latest)
-      last_sec=$(awk '/^Subject: / {print $4}' $latest)
 
       if $slow; then
         if ! $int; then
@@ -138,13 +196,18 @@ for folder in ${folders[@]}; do
           if [[ $(readlink /proc/$$/ns/net) != "$(readlink /proc/$spamdpid/ns/net)" ]]; then
             spamcpre="nsenter -t $spamdpid -n -m"
           fi
-
+          unset results
           declare -A results
           # pyzor fails for our test message, so dont put useless load on their
           # servers.
           # example line that sed is parsing:
           # (-0.1 / 5.0 requ) DKIM_SIGNED=0.1,DKIM_VALID=-0.1,DKIM_VALID_AU=-0.1,SPF_HELO_PASS=-0.001,SPF_PASS=-0.001,TVD_SPACE_RATIO=0.001 autolearn=_AUTOLEARN
-          raw_results="$($spamcpre sudo -u Debian-exim spamassassin -t --cf='score PYZOR_CHECK 0' <"$latest" | tail -n2 | head -n1 | sed -r 's/^\([^)]*\) *//;s/=[^, ]*([, ]|$)/ /g')"
+          resultfile=$(mktemp)
+          $spamcpre sudo -u Debian-exim spamassassin -D -t --cf='score PYZOR_CHECK 0' <"$latest" &>$resultfile
+
+          # note: on some mail, its 1 line after the send-test-forward, on others its 2 with a blank inbetween.
+          # I use the sed -n to filter this.
+          raw_results="$(tail $resultfile | grep -A2 -Fx /usr/local/bin/send-test-forward | tail -n+2 | sed -nr 's/^\([^)]*\) *//;s/=[^, ]*([, ]|$)/ /gp')"
           for r in $raw_results; do
             case $r in
               # got this in an update 2022-01. dun care
@@ -153,6 +216,10 @@ for folder in ${folders[@]}; do
               # it seems like some versions of spamassassin do BODY_SINGLE_WORD, others dont, we dun care.
               # bayes_00 is a new one indicating ham, we dont care if its missing.
               BAYES_00|BODY_SINGLE_WORD|FROM_FMBLA_NEWDOM*|autolearn) : ;;
+
+              # These have somewhat randomly been added and removed, resulting in useless alerts, so ignore them.
+              RCVD_IN_DNSWL_MED|DKIMWL_WL_HIGH) : ;;
+
               SPF_HELO_NEUTRAL)
                 # some of my domains use neutral spf, treat them the same.
                 results[SPF_HELO_PASS]=t
@@ -169,12 +236,7 @@ for folder in ${folders[@]}; do
           keys=(DKIM_SIGNED DKIM_VALID{,_AU,_EF} SPF_HELO_PASS SPF_PASS TVD_SPACE_RATIO)
           if [[ $to == *@gnu.org && $from == *@gnu.org ]]; then
             keys=(ALL_TRUSTED TVD_SPACE_RATIO)
-          elif [[ $to == *@gnu.org ]]; then
-            # eggs has RCVD_IN_DNSWL_MED
-            keys+=(RCVD_IN_DNSWL_MED)
-          elif [[ $from == *@gnu.org ]]; then
-            # eggs has these
-            keys+=(RCVD_IN_DNSWL_MED DKIMWL_WL_HIGH)
+            # from eggs had DKIMWL_WL_HIGH sometime in 2022, then DKIMWL_WL_MED unti march 2023
           fi
 
           for t in  ${keys[@]}; do
@@ -195,44 +257,105 @@ for folder in ${folders[@]}; do
             fi
           done
           if (( ${#results[@]} || ${#missing[@]} )); then
-            printf "$HOSTNAME spamtest %s/%s\n" "$latest"
+            printf "$HOSTNAME spamtest %s\n" "$latest"
             if (( ${#results[@]} )); then
               printf "unexpected %s" "${!results[*]} "
             fi
             if (( ${#missing[@]} )); then
               printf "missing %s" "${missing[*]}"
             fi
-            echo
-            echo mailtest-check: cat $latest:
-            cat $latest
-            echo mailtest-check: end of cat
-            printf "$(tput setaf 5 2>/dev/null ||:)█$(tput sgr0 2>/dev/null||:)%.0s" $(eval echo "{1..${COLUMNS:-60}}")
+            echo # ends our printf string buildup
+            cat $resultfile
+            echo mailtest-check: end of spam debug results
+            # lets just handle 1 failure at a time in interactive mode.
+            if $int; then
+              echo mailtest-check: from: $from, to: $to
+              exit 0
+            fi
+
+            # less verbose debug output, commented since I might want it another time.
+            # if $int; then
+            #   echo mailtest-check: cat $latest:
+            #   cat $latest
+            #   echo mailtest-check: end of cat
+            #fi
           fi
+          rm -f $resultfile
+          for r in ${results[@]}; do
+            case $r in
+              # iank: for when we want to handle dns errors differently.
+              # also uncomment declaration of dnsfail above.
+              # DKIM_INVALID|T_SPF_TEMPERROR|T_SPF_HELO_TEMPERROR)
+              #   dnsfail+=1
+              #   ;;
+              *)
+                unexpected=$(( unexpected + 1 ))
+                ;;
+            esac
+          done
+          for miss in ${missing[@]}; do
+            # At some point we had annoying dns failures that we couldn't solve so we
+            # we counted dns fail related results separately and alert differently.
+            # DKIM_VALID|DKIM_VALID_AU|DKIM_VALID_EF|SPF_HELO_PASS|SPF_PASS|
+            case $miss in
+              *)
+                unexpected+=1
+                ;;
+            esac
+          done
+          mapfile -O ${#p_missing_dnswl[@]} -t p_missing_dnswl <<EOF
+mailtest_check_missing_dnswl{folder="$folder",from="$from"} $missing_dnswl
+EOF
+          mapfile -O ${#p_unexpected_spamd_results[@]} -t p_unexpected_spamd_results <<EOF
+mailtest_check_unexpected_spamd_results{folder="$folder",from="$from"} $unexpected
+EOF
         fi # if spamdpid
       fi # if $slow
-    fi # if [[ $latest ]]
-
-    now=$(date +%s)
-    limit=$(( now - 60 * min_limit ))
-    age_sec=$(( now - last_sec ))
-    e $((age_sec / 60)):$(( age_sec % 60 )) ago. to:$to from:$from $latest
-
-    if (( last_sec <= limit )); then
-      echo $HOSTNAME mailtest $folder $from $(date -d @$last_sec +'%a %m-%d %H:%M')
-    fi
-    # usec = unix seconds
-    pr <<EOF
+
+      now=$EPOCHSECONDS
+      age_sec=$(( now - last_sec ))
+      e $((age_sec / 60)):$(( age_sec % 60 )) ago. to:$to from:$from $latest
+
+      # usec = unix seconds
+      mapfile -O ${#p_last_usec[@]} -t p_last_usec <<EOF
 mailtest_check_last_usec{folder="$folder",from="$from"} $last_sec
 EOF
+    done # end for from in ${froms[@]}
+  done # end for folder in ${folders[@]}
+
+  dir=/var/lib/prometheus/node-exporter
+  path=$dir/mailtest-check.prom.$$
+  if $doprom && [[ -e $dir  ]]; then
+    for l in "${p_unexpected_spamd_results[@]}"; do
+      printf "%s\n" "$l" >>$path
+      done
+    for l in "${p_missing_dnswl[@]}"; do
+      printf "%s\n" "$l" >>$path
+    done
+    for l in "${p_last_usec[@]}"; do
+      printf "%s\n" "$l" >>$path
+    done
+    mv $path $dir/mailtest-check.prom
+    # note: node_textfile_mtime_seconds will tell us when this last happened. useful for debugging.
+  fi
+}
+
+loop-main() {
+  # When running under systemd, the system just started. Ve nice and
+  # give programs some time to finish their startup.
+  sleep 10
+  while true; do
+    premain_sec=$EPOCHSECONDS
+    main
+    sleep $(( 300 - ( EPOCHSECONDS - premain_sec ) ))
   done
-done
-if $slow; then
-  pr <<EOF
-mailtest_check_unexpected_spamd_results $unexpected
-EOF
-fi
+}
 
-dir=/var/lib/prometheus/node-exporter
-if [[ -e $dir  ]]; then
-  mv $dir/mailtest-check.prom.$$ $dir/mailtest-check.prom
+
+if [[ $INVOCATION_ID ]]; then
+  loop-main
+else
+  main
 fi
+
+exit 0