host info updates
[distro-setup] / mailtest-check
1 #!/bin/bash
2 # I, Ian Kelling, follow the GNU license recommendations at
3 # https://www.gnu.org/licenses/license-recommendations.en.html. They
4 # recommend that small programs, < 300 lines, be licensed under the
5 # Apache License 2.0. This file contains or is part of one or more small
6 # programs. If a small program grows beyond 300 lines, I plan to switch
7 # its license to GPL.
8
9 # Copyright 2024 Ian Kelling
10
11 # Licensed under the Apache License, Version 2.0 (the "License");
12 # you may not use this file except in compliance with the License.
13 # You may obtain a copy of the License at
14
15 # http://www.apache.org/licenses/LICENSE-2.0
16
17 # Unless required by applicable law or agreed to in writing, software
18 # distributed under the License is distributed on an "AS IS" BASIS,
19 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 # See the License for the specific language governing permissions and
21 # limitations under the License.
22
23
24 # Usage: mail-test-check [slow] [int|nonint]
25 #
26 # slow: do slow checks, like spamassassin
27 #
28 # for non-interactive, dont print unless something went
29 # wrong
30
31
32 source /b/bash-bear-trap/bash-bear
33
34 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
35
36 shopt -s nullglob
37
38 e() { $int || return 0; printf "mailtest-check: %s\n" "$*"; }
39
40 getspamdpid() {
41 if [[ ! $spamdpid || ! -d /proc/$spamdpid ]]; then
42 # try twice in case we are restarting, it happens.
43 for i in 1 2; do
44 spamdpid=$(systemctl show --property MainPID --value spamassassin | sed 's/^[10]$//' ||:)
45 if [[ $spamdpid ]]; then
46 break
47 fi
48 sleep 30
49 done
50 fi
51 }
52
53
54 #### begin arg processing ####
55 # spamassassin checking takes about 8 seconds.
56 slow=false
57 if [[ $1 == slow ]]; then
58 slow=true
59 shift
60 fi
61
62 int=false
63 if [[ $SUDO_USER || $SSH_CONNECTION ]]; then
64 int=true
65 fi
66
67 if [[ $1 == int ]]; then
68 int=true
69 fi
70
71 if [[ $1 == nonint ]]; then
72 int=false
73 fi
74 #### end arg processing ####
75
76 # we put this in to avoid dns errors that happen on reboot,
77 # but I want to debug them.
78 # if ! $int; then
79 # sleep 60
80 # fi
81
82
83 # TODO, get je to deliver the local mailbox: /m/md/INBOX
84 # dovecot appears to setup, i can t be sure.
85
86 source /a/bin/bash_unpublished/source-state
87
88 doprom=false
89 case $HOSTNAME in
90 $MAIL_HOST|bk|je)
91 doprom=true
92 ;;
93 *)
94 rm -f /var/lib/prometheus/node-exporter/mailtest-check.prom*
95 ;;
96 esac
97
98 main() {
99
100 local -a p_unexpected_spamd_results p_missing_dnswl p_last_usec
101 case $HOSTNAME in
102 bk)
103 folders=(/m/md/{expertpathologyreview.com,amnimal.ninja}/testignore)
104 froms=(ian@iankelling.org z@zroe.org testignore@je.b8.nz iank@gnu.org)
105 ;;
106 je)
107 froms=(ian@iankelling.org z@zroe.org iank@gnu.org testignore@amnimal.ninja)
108 folders=(/m/md/je.b8.nz/testignore)
109 ;;
110 *)
111 folders=(/m/md/l/testignore)
112 froms=(testignore@je.b8.nz testignore@expertpathologyreview.com testignore@amnimal.ninja ian@iankelling.org z@zroe.org)
113 if ! $int; then
114 ### begin rsyncing fencepost email ###
115 # We dont want to exit if rsync fails, that will get caught by
116 # our later test by virtue of not having the latest email.
117 did_rsync=false
118 try_start_time=$EPOCHSECONDS
119 try_limit=140 # somewhat arbitrary value
120 while ! $did_rsync; do
121 try_left=$(( try_limit - ( EPOCHSECONDS - try_start_time) ))
122 timeout=120 # somewhat arbitrary value
123 if (( try_left < 0 )); then
124 echo "mailtest-check: failed to rsync fencepost > $try_limit seconds"
125 break
126 fi
127 if (( try_left < timeout )); then
128 timeout=$try_left
129 fi
130 if timeout $timeout rsync --chown iank:iank -e "ssh -oIdentitiesOnly=yes -F /dev/null -i /root/.ssh/jtuttle" -t --inplace -r 'jtuttle@fencepost.gnu.org:/home/j/jtuttle/Maildir/new/' /m/md/l/testignore/new; then
131 did_rsync=true
132 else
133 sleep 4
134 fi
135 done
136 if ! $did_rsync; then
137 echo mailtest-check: warning: fencepost rsync failed
138 fi
139 ### end rsyncing fencepost email ###
140 fi
141 ;;
142 esac
143
144
145 # avoid errors like this:
146 # Nov 8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/WLBLEval.pm: lib/Mail/SpamAssassin/Plugin/WLBLEval.pm: Permission denied at (eval 59) line 1.
147 #Nov 8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/VBounce.pm: lib/Mail/SpamAssassin/Plugin/VBounce.pm: Permission denied at (eval 60) line 1.
148 # i dont know why, i just found the solution online
149 cd /m/md
150
151
152 getspamdpid
153 # first time we write, overwrite anything existing
154 if [[ -e /var/lib/prometheus/node-exporter ]]; then
155 cat >/var/lib/prometheus/node-exporter/mailtest-check.prom.$$ <<EOF
156 mailtest_check_found_spamd_pid_bool $(( ${spamdpid:-0} > 0 ))
157 EOF
158 fi
159 e spamdpid: $spamdpid
160 if [[ ! $spamdpid ]]; then
161 echo mailtest spamd pid not found. systemctl status spamassassin:
162 systemctl status spamassassin
163 fi
164 tmpfile=$(mktemp)
165 declare -i unexpected=0
166 for folder in ${folders[@]}; do
167 for from in ${froms[@]}; do
168 declare -i missing_dnswl=0
169 #declare -i dnsfail=0
170 declare -i unexpected=0
171 latest=
172 last_sec=0
173
174 if ! grep -rlFx "From: $from" $folder/{new,cur} >$tmpfile; then
175 echo "no message found from: $from"
176 continue
177 fi
178 # webmail sends them to cur it seems
179 while read -r file; do
180 file_sec=$(awk '/^Subject: / {print $4}' $file)
181 if [[ $file_sec ]] && (( file_sec > last_sec )); then
182 latest=$file
183 last_sec="$file_sec"
184 fi
185 done <$tmpfile
186 rm -f $tmpfile
187
188 to=$(awk '/^Envelope-to: / {print $2}' $latest)
189
190 if $slow; then
191 if ! $int; then
192 find $folder/new $folder/cur -type f -mmin +1080 -delete
193 fi
194 getspamdpid
195 if [[ $spamdpid ]]; then
196 if [[ $(readlink /proc/$$/ns/net) != "$(readlink /proc/$spamdpid/ns/net)" ]]; then
197 spamcpre="nsenter -t $spamdpid -n -m"
198 fi
199 unset results
200 declare -A results
201 # pyzor fails for our test message, so dont put useless load on their
202 # servers.
203 # example line that sed is parsing:
204 # (-0.1 / 5.0 requ) DKIM_SIGNED=0.1,DKIM_VALID=-0.1,DKIM_VALID_AU=-0.1,SPF_HELO_PASS=-0.001,SPF_PASS=-0.001,TVD_SPACE_RATIO=0.001 autolearn=_AUTOLEARN
205 resultfile=$(mktemp)
206 $spamcpre sudo -u Debian-exim spamassassin -D -t --cf='score PYZOR_CHECK 0' <"$latest" &>$resultfile
207
208 # note: on some mail, its 1 line after the send-test-forward, on others its 2 with a blank inbetween.
209 # I use the sed -n to filter this.
210 raw_results="$(tail $resultfile | grep -A2 -Fx /usr/local/bin/send-test-forward | tail -n+2 | sed -nr 's/^\([^)]*\) *//;s/=[^, ]*([, ]|$)/ /gp')"
211 for r in $raw_results; do
212 case $r in
213 # got this in an update 2022-01. dun care
214 T_SCC_BODY_TEXT_LINE|SCC_BODY_SINGLE_WORD) : ;;
215 # we have a new domain, ignore this.
216 # it seems like some versions of spamassassin do BODY_SINGLE_WORD, others dont, we dun care.
217 # bayes_00 is a new one indicating ham, we dont care if its missing.
218 BAYES_00|BODY_SINGLE_WORD|FROM_FMBLA_NEWDOM*|autolearn) : ;;
219
220 # These have somewhat randomly been added and removed, resulting in useless alerts, so ignore them.
221 RCVD_IN_DNSWL_MED|DKIMWL_WL_HIGH) : ;;
222
223 SPF_HELO_NEUTRAL)
224 # some of my domains use neutral spf, treat them the same.
225 results[SPF_HELO_PASS]=t
226 ;;
227 *)
228 results[$r]=t
229 ;;
230 esac
231 done
232 # debugging
233 # e results = ${!results[@]}
234 missing=()
235
236 keys=(DKIM_SIGNED DKIM_VALID{,_AU,_EF} SPF_HELO_PASS SPF_PASS TVD_SPACE_RATIO)
237 if [[ $to == *@gnu.org && $from == *@gnu.org ]]; then
238 keys=(ALL_TRUSTED TVD_SPACE_RATIO)
239 # from eggs had DKIMWL_WL_HIGH sometime in 2022, then DKIMWL_WL_MED unti march 2023
240 fi
241
242 for t in ${keys[@]}; do
243 if [[ ${results[$t]} ]]; then
244 unset "results[$t]"
245 elif [[ $t == DKIM_VALID_EF && $from == *@[^.]*.[^.]*.[^.]* ]]; then
246 :
247 # third level domains dont hit this. its because
248 # /usr/share/perl5/Mail/SpamAssassin/Plugin/DKIM.pm checks
249 # if its signed with the registryboundaries domain. afaik:
250 # we need the actual domain to sign it, this would result in
251 # a second signature. I only use second level domains for
252 # testing atm, fsf doesnt use them for anything but the
253 # forum and I dont expect that to have any deliverability
254 # problems. So, not bothering atm.
255 else
256 missing+=($t)
257 fi
258 done
259 if (( ${#results[@]} || ${#missing[@]} )); then
260 printf "$HOSTNAME spamtest %s\n" "$latest"
261 if (( ${#results[@]} )); then
262 printf "unexpected %s" "${!results[*]} "
263 fi
264 if (( ${#missing[@]} )); then
265 printf "missing %s" "${missing[*]}"
266 fi
267 echo # ends our printf string buildup
268 cat $resultfile
269 echo mailtest-check: end of spam debug results
270 # lets just handle 1 failure at a time in interactive mode.
271 if $int; then
272 echo mailtest-check: from: $from, to: $to
273 exit 0
274 fi
275
276 # less verbose debug output, commented since I might want it another time.
277 # if $int; then
278 # echo mailtest-check: cat $latest:
279 # cat $latest
280 # echo mailtest-check: end of cat
281 #fi
282 fi
283 rm -f $resultfile
284 for r in ${results[@]}; do
285 case $r in
286 # iank: for when we want to handle dns errors differently.
287 # also uncomment declaration of dnsfail above.
288 # DKIM_INVALID|T_SPF_TEMPERROR|T_SPF_HELO_TEMPERROR)
289 # dnsfail+=1
290 # ;;
291 *)
292 unexpected=$(( unexpected + 1 ))
293 ;;
294 esac
295 done
296 for miss in ${missing[@]}; do
297 # At some point we had annoying dns failures that we couldn't solve so we
298 # we counted dns fail related results separately and alert differently.
299 # DKIM_VALID|DKIM_VALID_AU|DKIM_VALID_EF|SPF_HELO_PASS|SPF_PASS|
300 case $miss in
301 *)
302 unexpected+=1
303 ;;
304 esac
305 done
306 mapfile -O ${#p_missing_dnswl[@]} -t p_missing_dnswl <<EOF
307 mailtest_check_missing_dnswl{folder="$folder",from="$from"} $missing_dnswl
308 EOF
309 mapfile -O ${#p_unexpected_spamd_results[@]} -t p_unexpected_spamd_results <<EOF
310 mailtest_check_unexpected_spamd_results{folder="$folder",from="$from"} $unexpected
311 EOF
312 fi # if spamdpid
313 fi # if $slow
314
315 now=$EPOCHSECONDS
316 age_sec=$(( now - last_sec ))
317 e $((age_sec / 60)):$(( age_sec % 60 )) ago. to:$to from:$from $latest
318
319 # usec = unix seconds
320 mapfile -O ${#p_last_usec[@]} -t p_last_usec <<EOF
321 mailtest_check_last_usec{folder="$folder",from="$from"} $last_sec
322 EOF
323 done # end for from in ${froms[@]}
324 done # end for folder in ${folders[@]}
325
326 dir=/var/lib/prometheus/node-exporter
327 path=$dir/mailtest-check.prom.$$
328 if $doprom && [[ -e $dir ]]; then
329 for l in "${p_unexpected_spamd_results[@]}"; do
330 printf "%s\n" "$l" >>$path
331 done
332 for l in "${p_missing_dnswl[@]}"; do
333 printf "%s\n" "$l" >>$path
334 done
335 for l in "${p_last_usec[@]}"; do
336 printf "%s\n" "$l" >>$path
337 done
338 mv $path $dir/mailtest-check.prom
339 # note: node_textfile_mtime_seconds will tell us when this last happened. useful for debugging.
340 fi
341 }
342
343 loop-main() {
344 # When running under systemd, the system just started. Ve nice and
345 # give programs some time to finish their startup.
346 sleep 10
347 while true; do
348 premain_sec=$EPOCHSECONDS
349 main
350 sleep $(( 300 - ( EPOCHSECONDS - premain_sec ) ))
351 done
352 }
353
354
355 if [[ $INVOCATION_ID ]]; then
356 loop-main
357 else
358 main
359 fi
360
361 exit 0