some fixes, and dns debugging on bk
[distro-setup] / mailtest-check
index 5cf79fe8278b8ae69532ee66fb17b60e49ac793a..cce5908fb78267d53c06a8bb224df0248b40daca 100755 (executable)
@@ -57,10 +57,11 @@ if [[ $1 == nonint ]]; then
 fi
 #### end arg processing ####
 
-
-if ! $int; then
-  sleep 60
-fi
+# we put this in to avoid dns errors that happen on reboot,
+# but I want to debug them.
+# if ! $int; then
+#   sleep 60
+# fi
 
 
 # TODO, get je to deliver the local mailbox: /m/md/INBOX
@@ -93,7 +94,31 @@ main() {
       folders=(/m/md/l/testignore)
       froms=(testignore@je.b8.nz testignore@expertpathologyreview.com testignore@amnimal.ninja ian@iankelling.org z@zroe.org iank@gnu.org)
       if ! $int; then
-        timeout 120 rsync --chown iank:iank -e "ssh -oIdentitiesOnly=yes -F /dev/null -i /root/.ssh/jtuttle" -t --inplace -r 'jtuttle@fencepost.gnu.org:/home/j/jtuttle/Maildir/new/' /m/md/l/testignore/new
+        ### begin rsyncing fencepost email ###
+        # We dont want to exit if rsync fails, that will get caught by
+        # our later test by virtue of not having the latest email.
+        did_rsync=false
+        try_start_time=$EPOCHSECONDS
+        try_limit=140 # somewhat arbitrary value
+        while ! $did_rsync; do
+          try_left=$(( try_limit - ( EPOCHSECONDS - try_start_time) ))
+          timeout=120 # somewhat arbitrary value
+          if (( try_left < 0 )); then
+            break
+          fi
+          if (( try_left < timeout )); then
+            timeout=$try_left
+          fi
+          if timeout $timeout rsync --chown iank:iank -e "ssh -oIdentitiesOnly=yes -F /dev/null -i /root/.ssh/jtuttle" -t --inplace -r 'jtuttle@fencepost.gnu.org:/home/j/jtuttle/Maildir/new/' /m/md/l/testignore/new; then
+            did_rsync=true
+          else
+            sleep 4
+          fi
+        done
+        if ! $did_rsync; then
+          echo mailtest-check: warning: fencepost rsync failed
+        fi
+        ### end rsyncing fencepost email ###
       fi
       ;;
   esac
@@ -120,6 +145,8 @@ EOF
   fi
   tmpfile=$(mktemp)
   declare -i unexpected=0
+  declare -i missing_dnswl=0
+  declare -i dnsfail=0
   for folder in ${folders[@]}; do
     for from in ${froms[@]}; do
       latest=
@@ -131,13 +158,15 @@ EOF
       fi
       # webmail sends them to cur it seems
       while read -r file; do
-        if [[ $file -nt $latest ]]; then
+        file_sec=$(awk '/^Subject: / {print $4}' $file)
+        if [[ $file_sec ]] && (( file_sec > last_sec )); then
           latest=$file
+          last_sec="$file_sec"
         fi
       done <$tmpfile
+      rm -f $tmpfile
 
       to=$(awk '/^Envelope-to: / {print $2}' $latest)
-      last_sec=$(awk '/^Subject: / {print $4}' $latest)
 
       if $slow; then
         if ! $int; then
@@ -220,7 +249,6 @@ EOF
             echo # ends our printf string buildup
             cat $resultfile
             echo mailtest-check: end of spam debug results
-
             # lets just handle 1 failure at a time in interactive mode.
             if $int; then
               echo mailtest-check: from: $from, to: $to
@@ -236,6 +264,31 @@ EOF
             #fi
           fi
           rm -f $resultfile
+          for r in ${results[@]}; do
+            case $r in
+              # iank: for when we want to handle dns errors differently
+              # DKIM_INVALID|T_SPF_TEMPERROR|T_SPF_HELO_TEMPERROR)
+              #   dnsfail+=1
+              #   ;;
+              *)
+                unexpected=$(( unexpected + 1 ))
+                ;;
+            esac
+          done
+          for miss in ${missing[@]}; do
+            # We expect dns failures from time to time, so
+            # we count them separately and alert differently.
+            case $miss in
+              # iank: dns fail
+              # DKIM_VALID|DKIM_VALID_AU|DKIM_VALID_EF|SPF_HELO_PASS|SPF_PASS|
+              RCVD_IN_DNSWL_MED|DKIMWL_WL_HIGH)
+                missing_dnswl+=1
+                ;;
+              *)
+                unexpected+=1
+                ;;
+            esac
+          done
         fi # if spamdpid
       fi # if $slow
 
@@ -247,11 +300,11 @@ EOF
       pr <<EOF
 mailtest_check_last_usec{folder="$folder",from="$from"} $last_sec
 EOF
-    done
-    unexpected=$(( unexpected + ${#results[@]} + ${#missing[@]} ))
-  done
+    done # end for from in ${froms[@]}
+  done # end for folder in ${folders[@]}
   if $slow; then
     pr <<EOF
+mailtest_check_missing_dnswl $missing_dnswl
 mailtest_check_unexpected_spamd_results $unexpected
 EOF
   fi
@@ -264,10 +317,13 @@ EOF
 }
 
 loop-main() {
+  # When running under systemd, the system just started. Ve nice and
+  # give programs some time to finish their startup.
+  sleep 10
   while true; do
     premain_sec=$EPOCHSECONDS
     main
-    sleep $(( 300 - ( $EPOCHSECONDS - premain_sec ) ))
+    sleep $(( 300 - ( EPOCHSECONDS - premain_sec ) ))
   done
 }