some fixes, and dns debugging on bk
[distro-setup] / mailtest-check
1 #!/bin/bash
2
3 # Usage: mail-test-check [slow] [anything]
4 #
5 # slow: do slow checks, like spamassassin
6 #
7 # anything: consider non-interactive, dont print unless something went
8 # wrong
9
10
11 source /b/errhandle/err
12
13 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
14
15 shopt -s nullglob
16
17 e() { $int || return 0; printf "mailtest-check: %s\n" "$*"; }
18
19 getspamdpid() {
20 if [[ ! $spamdpid || ! -d /proc/$spamdpid ]]; then
21 # try twice in case we are restarting, it happens.
22 for i in 1 2; do
23 spamdpid=$(systemctl show --property MainPID --value spamassassin | sed 's/^[10]$//' ||:)
24 if [[ $spamdpid ]]; then
25 break
26 fi
27 sleep 30
28 done
29 fi
30 }
31 pr() {
32 if $doprom && [[ -e /var/lib/prometheus/node-exporter ]]; then
33 cat >>/var/lib/prometheus/node-exporter/mailtest-check.prom.$$
34 fi
35 }
36
37
38 #### begin arg processing ####
39 # spamassassin checking takes about 8 seconds.
40 slow=false
41 if [[ $1 == slow ]]; then
42 slow=true
43 shift
44 fi
45
46 int=false
47 if [[ $SUDO_USER || $SSH_CONNECTION ]]; then
48 int=true
49 fi
50
51 if [[ $1 == int ]]; then
52 int=true
53 fi
54
55 if [[ $1 == nonint ]]; then
56 int=false
57 fi
58 #### end arg processing ####
59
60 # we put this in to avoid dns errors that happen on reboot,
61 # but I want to debug them.
62 # if ! $int; then
63 # sleep 60
64 # fi
65
66
67 # TODO, get je to deliver the local mailbox: /m/md/INBOX
68 # dovecot appears to setup, i can t be sure.
69
70 source /a/bin/bash_unpublished/source-state
71
72 doprom=false
73 case $HOSTNAME in
74 $MAIL_HOST|bk|je)
75 doprom=true
76 ;;
77 *)
78 rm -f /var/lib/prometheus/node-exporter/mailtest-check.prom*
79 ;;
80 esac
81
82 main() {
83
84 case $HOSTNAME in
85 bk)
86 folders=(/m/md/{expertpathologyreview.com,amnimal.ninja}/testignore)
87 froms=(ian@iankelling.org z@zroe.org testignore@je.b8.nz iank@gnu.org)
88 ;;
89 je)
90 froms=(ian@iankelling.org z@zroe.org iank@gnu.org testignore@amnimal.ninja)
91 folders=(/m/md/je.b8.nz/testignore)
92 ;;
93 *)
94 folders=(/m/md/l/testignore)
95 froms=(testignore@je.b8.nz testignore@expertpathologyreview.com testignore@amnimal.ninja ian@iankelling.org z@zroe.org iank@gnu.org)
96 if ! $int; then
97 ### begin rsyncing fencepost email ###
98 # We dont want to exit if rsync fails, that will get caught by
99 # our later test by virtue of not having the latest email.
100 did_rsync=false
101 try_start_time=$EPOCHSECONDS
102 try_limit=140 # somewhat arbitrary value
103 while ! $did_rsync; do
104 try_left=$(( try_limit - ( EPOCHSECONDS - try_start_time) ))
105 timeout=120 # somewhat arbitrary value
106 if (( try_left < 0 )); then
107 break
108 fi
109 if (( try_left < timeout )); then
110 timeout=$try_left
111 fi
112 if timeout $timeout rsync --chown iank:iank -e "ssh -oIdentitiesOnly=yes -F /dev/null -i /root/.ssh/jtuttle" -t --inplace -r 'jtuttle@fencepost.gnu.org:/home/j/jtuttle/Maildir/new/' /m/md/l/testignore/new; then
113 did_rsync=true
114 else
115 sleep 4
116 fi
117 done
118 if ! $did_rsync; then
119 echo mailtest-check: warning: fencepost rsync failed
120 fi
121 ### end rsyncing fencepost email ###
122 fi
123 ;;
124 esac
125
126
127 # avoid errors like this:
128 # Nov 8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/WLBLEval.pm: lib/Mail/SpamAssassin/Plugin/WLBLEval.pm: Permission denied at (eval 59) line 1.
129 #Nov 8 08:16:05.439 [6080] warn: plugin: failed to parse plugin (from @INC): Can't locate Mail/SpamAssassin/Plugin/VBounce.pm: lib/Mail/SpamAssassin/Plugin/VBounce.pm: Permission denied at (eval 60) line 1.
130 # i dont know why, i just found the solution online
131 cd /m/md
132
133
134 getspamdpid
135 # first time we write, overwrite anything existing
136 if [[ -e /var/lib/prometheus/node-exporter ]]; then
137 cat >/var/lib/prometheus/node-exporter/mailtest-check.prom.$$ <<EOF
138 mailtest_check_found_spamd_pid_bool $(( ${spamdpid:-0} > 0 ))
139 EOF
140 fi
141 e spamdpid: $spamdpid
142 if [[ ! $spamdpid ]]; then
143 echo mailtest spamd pid not found. systemctl status spamassassin:
144 systemctl status spamassassin
145 fi
146 tmpfile=$(mktemp)
147 declare -i unexpected=0
148 declare -i missing_dnswl=0
149 declare -i dnsfail=0
150 for folder in ${folders[@]}; do
151 for from in ${froms[@]}; do
152 latest=
153 last_sec=0
154
155 if ! grep -rlFx "From: $from" $folder/{new,cur} >$tmpfile; then
156 echo "no message found from: $from"
157 continue
158 fi
159 # webmail sends them to cur it seems
160 while read -r file; do
161 file_sec=$(awk '/^Subject: / {print $4}' $file)
162 if [[ $file_sec ]] && (( file_sec > last_sec )); then
163 latest=$file
164 last_sec="$file_sec"
165 fi
166 done <$tmpfile
167 rm -f $tmpfile
168
169 to=$(awk '/^Envelope-to: / {print $2}' $latest)
170
171 if $slow; then
172 if ! $int; then
173 find $folder/new $folder/cur -type f -mmin +1080 -delete
174 fi
175 getspamdpid
176 if [[ $spamdpid ]]; then
177 if [[ $(readlink /proc/$$/ns/net) != "$(readlink /proc/$spamdpid/ns/net)" ]]; then
178 spamcpre="nsenter -t $spamdpid -n -m"
179 fi
180 unset results
181 declare -A results
182 # pyzor fails for our test message, so dont put useless load on their
183 # servers.
184 # example line that sed is parsing:
185 # (-0.1 / 5.0 requ) DKIM_SIGNED=0.1,DKIM_VALID=-0.1,DKIM_VALID_AU=-0.1,SPF_HELO_PASS=-0.001,SPF_PASS=-0.001,TVD_SPACE_RATIO=0.001 autolearn=_AUTOLEARN
186 resultfile=$(mktemp)
187 $spamcpre sudo -u Debian-exim spamassassin -D -t --cf='score PYZOR_CHECK 0' <"$latest" &>$resultfile
188
189 # note: on some mail, its 1 line after the send-test-forward, on others its 2 with a blank inbetween.
190 # I use the sed -n to filter this.
191 raw_results="$(tail $resultfile | grep -A2 -Fx /usr/local/bin/send-test-forward | tail -n+2 | sed -nr 's/^\([^)]*\) *//;s/=[^, ]*([, ]|$)/ /gp')"
192 for r in $raw_results; do
193 case $r in
194 # got this in an update 2022-01. dun care
195 T_SCC_BODY_TEXT_LINE|SCC_BODY_SINGLE_WORD) : ;;
196 # we have a new domain, ignore this.
197 # it seems like some versions of spamassassin do BODY_SINGLE_WORD, others dont, we dun care.
198 # bayes_00 is a new one indicating ham, we dont care if its missing.
199 BAYES_00|BODY_SINGLE_WORD|FROM_FMBLA_NEWDOM*|autolearn) : ;;
200 SPF_HELO_NEUTRAL)
201 # some of my domains use neutral spf, treat them the same.
202 results[SPF_HELO_PASS]=t
203 ;;
204 *)
205 results[$r]=t
206 ;;
207 esac
208 done
209 # debugging
210 # e results = ${!results[@]}
211 missing=()
212
213 keys=(DKIM_SIGNED DKIM_VALID{,_AU,_EF} SPF_HELO_PASS SPF_PASS TVD_SPACE_RATIO)
214 if [[ $to == *@gnu.org && $from == *@gnu.org ]]; then
215 keys=(ALL_TRUSTED TVD_SPACE_RATIO)
216 elif [[ $to == *@gnu.org ]]; then
217 # eggs has RCVD_IN_DNSWL_MED
218 keys+=(RCVD_IN_DNSWL_MED)
219 elif [[ $from == *@gnu.org ]]; then
220 # eggs has these
221 keys+=(RCVD_IN_DNSWL_MED DKIMWL_WL_HIGH)
222 fi
223
224 for t in ${keys[@]}; do
225 if [[ ${results[$t]} ]]; then
226 unset "results[$t]"
227 elif [[ $t == DKIM_VALID_EF && $from == *@[^.]*.[^.]*.[^.]* ]]; then
228 :
229 # third level domains dont hit this. its because
230 # /usr/share/perl5/Mail/SpamAssassin/Plugin/DKIM.pm checks
231 # if its signed with the registryboundaries domain. afaik:
232 # we need the actual domain to sign it, this would result in
233 # a second signature. I only use second level domains for
234 # testing atm, fsf doesnt use them for anything but the
235 # forum and I dont expect that to have any deliverability
236 # problems. So, not bothering atm.
237 else
238 missing+=($t)
239 fi
240 done
241 if (( ${#results[@]} || ${#missing[@]} )); then
242 printf "$HOSTNAME spamtest %s\n" "$latest"
243 if (( ${#results[@]} )); then
244 printf "unexpected %s" "${!results[*]} "
245 fi
246 if (( ${#missing[@]} )); then
247 printf "missing %s" "${missing[*]}"
248 fi
249 echo # ends our printf string buildup
250 cat $resultfile
251 echo mailtest-check: end of spam debug results
252 # lets just handle 1 failure at a time in interactive mode.
253 if $int; then
254 echo mailtest-check: from: $from, to: $to
255 exit 0
256 fi
257
258 # less verbose debug output, commented since I might want it another time.
259 # if $int; then
260 # echo mailtest-check: cat $latest:
261 # cat $latest
262 # echo mailtest-check: end of cat
263 # echo "$(tput setaf 5 2>/dev/null ||:)█$(tput sgr0 2>/dev/null||:)%.0s" $(eval echo "{1..${COLUMNS:-60}}")
264 #fi
265 fi
266 rm -f $resultfile
267 for r in ${results[@]}; do
268 case $r in
269 # iank: for when we want to handle dns errors differently
270 # DKIM_INVALID|T_SPF_TEMPERROR|T_SPF_HELO_TEMPERROR)
271 # dnsfail+=1
272 # ;;
273 *)
274 unexpected=$(( unexpected + 1 ))
275 ;;
276 esac
277 done
278 for miss in ${missing[@]}; do
279 # We expect dns failures from time to time, so
280 # we count them separately and alert differently.
281 case $miss in
282 # iank: dns fail
283 # DKIM_VALID|DKIM_VALID_AU|DKIM_VALID_EF|SPF_HELO_PASS|SPF_PASS|
284 RCVD_IN_DNSWL_MED|DKIMWL_WL_HIGH)
285 missing_dnswl+=1
286 ;;
287 *)
288 unexpected+=1
289 ;;
290 esac
291 done
292 fi # if spamdpid
293 fi # if $slow
294
295 now=$EPOCHSECONDS
296 age_sec=$(( now - last_sec ))
297 e $((age_sec / 60)):$(( age_sec % 60 )) ago. to:$to from:$from $latest
298
299 # usec = unix seconds
300 pr <<EOF
301 mailtest_check_last_usec{folder="$folder",from="$from"} $last_sec
302 EOF
303 done # end for from in ${froms[@]}
304 done # end for folder in ${folders[@]}
305 if $slow; then
306 pr <<EOF
307 mailtest_check_missing_dnswl $missing_dnswl
308 mailtest_check_unexpected_spamd_results $unexpected
309 EOF
310 fi
311
312 dir=/var/lib/prometheus/node-exporter
313 if [[ -e $dir ]]; then
314 mv $dir/mailtest-check.prom.$$ $dir/mailtest-check.prom
315 # note: node_textfile_mtime_seconds will tell us when this last happened. useful for debugging.
316 fi
317 }
318
319 loop-main() {
320 # When running under systemd, the system just started. Ve nice and
321 # give programs some time to finish their startup.
322 sleep 10
323 while true; do
324 premain_sec=$EPOCHSECONDS
325 main
326 sleep $(( 300 - ( EPOCHSECONDS - premain_sec ) ))
327 done
328 }
329
330
331 if [[ $INVOCATION_ID ]]; then
332 loop-main
333 else
334 main
335 fi
336
337 exit 0