From: Ian Kelling Date: Mon, 24 Jun 2024 10:09:57 +0000 (-0400) Subject: use rspamd, speed up mailtest-check X-Git-Url: https://iankelling.org/git/?a=commitdiff_plain;h=85cfa73bb1a7991183b31ddb1dddea9364fe72c3;p=distro-setup use rspamd, speed up mailtest-check --- diff --git a/mail-setup b/mail-setup index 0e0c6d0..9ad7eea 100755 --- a/mail-setup +++ b/mail-setup @@ -476,7 +476,7 @@ fi # light version of exim does not have sasl auth support. # note: for bitfolk hosts, unbound has important config with conflink. -pi-nostart exim4 exim4-daemon-heavy spamassassin unbound clamav-daemon wireguard +pi-nostart exim4 exim4-daemon-heavy spamassassin unbound clamav-daemon wireguard rspamd spamd_remove=spamassassin spamd_ser=spamd @@ -881,7 +881,7 @@ nn_progs=(exim4) if mailhost; then # Note dovecots lmtp doesnt need to be in the same nn to accept delivery. # Its in the nn so remote clients can connect to it. - nn_progs+=($spamd_ser dovecot) + nn_progs+=($spamd_ser rspamd dovecot) fi case $HOSTNAME in @@ -961,7 +961,7 @@ EOF done ;; *) - for unit in exim4 $spamd_ser $spamd_remove dovecot unbound; do + for unit in exim4 $spamd_ser rspamd $spamd_remove dovecot unbound; do f=/etc/systemd/system/$unit.service.d/nn.conf if [[ -s $f ]]; then rm -fv $f @@ -984,6 +984,19 @@ RestartSec=20 EOF fi +# * rspamd config + +#/a/exe/cedit /etc/redis/redis.conf <<'EOF' +# redis config is only readable by redis. if we wanted to not do +# that for our modifications, we could add this. +# include /etc/redis-local.conf + +# if we wanted to, we could run redis outside the mail nn by adding to +# its bind config option like this, and then tell rspamd to connect to +# this address. But it is slightly simpler to not do that. +# bind 127.0.0.1 -::1 10.173.8.1 +#EOF + # * spamassassin config u /etc/sysctl.d/80-iank-mail.conf <<'EOF' # see exim spec @@ -1480,6 +1493,7 @@ acl_not_smtp = acl_check_not_smtp DEBBUGS_DOMAIN = b.b8.nz +spamd_address = 127.0.0.1 11333 variant=rspamd EOF if dpkg --compare-versions "$(dpkg-query -f='${Version}\n' --show exim4)" ge 4.94; then @@ -1525,6 +1539,14 @@ EOF rm -fv /etc/exim4/data_local_acl # old path u /etc/exim4/conf.d/data_local_acl <<'EOF' + +warn + remove_header = X-Spam_score: X-Spam_score_int : X-Spam_bar : X-Spam_report + +warn + !hosts = +iank_trusted + # Smarthosts connect with residential ips and thus get flagged as spam if we do a spam check. + !authenticated = plain_server:login_server # Except for the "condition =", this was # a comment in the check_data acl. The comment about this not # being suitable has been changed in newer exim versions. The only thing @@ -1535,14 +1557,19 @@ u /etc/exim4/conf.d/data_local_acl <<'EOF' # suggested in official docs, and 100k in the wiki example because # those docs are rather old and I see a 110k spam message # pretty quickly looking through my spam folder. + condition = ${if < {$message_size}{5000K}} + spam = Debian-exim:true + add_header = X-Spam_score_int: $spam_score_int + add_header = X-Spam_score: $spam_score + add_header = X-Spam_bar: $spam_bar + add_header = X-Spam_report: $spam_report + add_header = X-Spam_action: $spam_action +# i don't want mail to myself getting wastefully scanned or +# mistakenly flagged as spam, but I do want to scan my spam test emails. warn - !hosts = +iank_trusted - remove_header = X-Spam_score: X-Spam_score_int : X-Spam_bar : X-Spam_report - -warn - !hosts = +iank_trusted - # Smarthosts connect with residential ips and thus get flagged as spam if we do a spam check. + condition = ${if forany{<, $recipients}{match{$item}{\N^testignore@\N}}} + hosts = +iank_trusted !authenticated = plain_server:login_server condition = ${if < {$message_size}{5000K}} spam = Debian-exim:true @@ -4081,7 +4108,7 @@ case $HOSTNAME in ;;& $MAIL_HOST|bk|je) # start spamassassin/dovecot before exim. - sre dovecot $spamd_ser mailtest-check + sre dovecot rspamd mailtest-check # Wait a bit before restarting exim, else I get a paniclog entry # like: spam acl condition: all spamd servers failed. But I'm tired # of waiting. I'll deal with this some other way. @@ -4115,7 +4142,7 @@ case $HOSTNAME in : ;; *) - soff radicale mailclean.timer dovecot $spamd_ser $vpnser mailnn clamav-daemon + soff radicale mailclean.timer dovecot $spamd_ser rspamd $vpnser mailnn clamav-daemon ;; esac diff --git a/mailtest-check b/mailtest-check index 9f37cb9..aa9b776 100755 --- a/mailtest-check +++ b/mailtest-check @@ -52,8 +52,136 @@ getspamdpid() { fi } +parse-rspamd() { + # rspamc uses $3. + awk '$1 == "Symbol:" && $2 !~ /\(0\.00\)/ && $3 !~ /\(0\.00\)/ {print $2}' | sed 's/(.*//' +} + +rspamc-process() { + + # note, this could in theory break since we aren't limiting it to the + # specific header. but that is unlikely, I'm doing all the header generation. + # example header: + # X-Spam_report: Action: no action + # Symbol: HFILTER_HOSTNAME_UNKNOWN(2.50) + # Symbol: RCVD_COUNT_TWO(0.00) + # Symbol: FROM_EQ_ENVFROM(0.00) + # Symbol: DMARC_POLICY_ALLOW(-0.50) + # Symbol: TO_DN_NONE(0.00) + # Symbol: TO_MATCH_ENVRCPT_SOME(0.00) + # Symbol: RCVD_TLS_LAST(0.00) + # Symbol: RBL_SENDERSCORE_FAIL(0.00) + # Symbol: R_DKIM_ALLOW(-0.20) + # Symbol: MIME_GOOD(-0.10) + # Symbol: MID_RHS_MATCH_FROM(0.00) + # Symbol: RCVD_IN_DNSWL_FAIL(0.00) + # Symbol: SINGLE_SHORT_PART(0.00) + # Symbol: R_SPF_ALLOW(-0.20) + # Symbol: ARC_NA(0.00) + # Symbol: ASN(0.00) + # Symbol: FROM_NO_DN(0.00) + # Symbol: MIME_TRACE(0.00) + # Symbol: MISSING_XM_UA(0.00) + # Symbol: RCPT_COUNT_THREE(0.00) + # Symbol: DKIM_TRACE(0.00) + # Message-ID: E1sLckD-004Ucv-P2@je.b8.nz + + if [[ $to == jtuttle@gnu.org ]]; then + raw_results=$($spamcpre sudo -u _rspamd rspamc --helo=mail.iankelling.org --hostname=mail.iankelling.org <"$latest" |& parse-rspamd) + else + raw_results=$( parse-rspamd <"$latest") + fi + for r in $raw_results; do + case $r in + # based on my spamassassin experience, these may change and are not important. + RCVD_IN_DNSWL_MED|RCVD_DKIM_ARC_DNSWL_MED) : ;; + *) + results[$r]=t + ;; + esac + done + keys=(DMARC_POLICY_ALLOW R_DKIM_ALLOW MIME_GOOD R_SPF_ALLOW) + for t in ${keys[@]}; do + if [[ ${results[$t]} ]]; then + unset "results[$t]" + else + missing+=($t) + fi + done +} + +spamc-process() { + # pyzor fails for our test message, so dont put useless load on their + # servers. + # example line that sed is parsing: + # (-0.1 / 5.0 requ) DKIM_SIGNED=0.1,DKIM_VALID=-0.1,DKIM_VALID_AU=-0.1,SPF_HELO_PASS=-0.001,SPF_PASS=-0.001,TVD_SPACE_RATIO=0.001 autolearn=_AUTOLEARN + # add -D for debug info. i haven't found it to be useful so it is off by default + resultstr=$($spamcpre sudo -u Debian-exim spamassassin -t --cf='score PYZOR_CHECK 0' <"$latest" 2>&1) + #resultstr=$($spamcpre sudo -u _rspamd rspamc <"$latest" 2>&1) + + # note: on some mail, its 1 line after the send-test-forward, + # on others its 2 with a blank in between. I use the sed -n to + # filter this. + ## spamassassin parsing. disabled, using rspamd + raw_results="$(printf "%s\n" "$resultstr"| tail | grep -A2 -Fx /usr/local/bin/send-test-forward | tail -n+2 | sed -nr 's/^\([^)]*\) *//;s/=[^, ]*([, ]|$)/ /gp')" + + # consider results we want to ignore or pre-process in some way. + for r in $raw_results; do + case $r in + # This came in t12, but its just dkim + spf, and my + # systems aren't all t12, so ignore it for now. + DMARC_PASS) : ;; + # got this in an update 2022-01. dun care + T_SCC_BODY_TEXT_LINE|SCC_BODY_SINGLE_WORD) : ;; + # we have a new domain, ignore this. + # it seems like some versions of spamassassin do BODY_SINGLE_WORD, others dont, we dun care. + # bayes_00 is a new one indicating ham, we dont care if its missing. + BAYES_00|BODY_SINGLE_WORD|FROM_FMBLA_NEWDOM*|autolearn) : ;; + + # These have somewhat randomly been added and removed, resulting in useless alerts, so ignore them. + RCVD_IN_DNSWL_MED|DKIMWL_WL_HIGH) : ;; + + SPF_HELO_NEUTRAL) + # some of my domains use neutral spf, treat them the same. + results[SPF_HELO_PASS]=t + ;; + *) + results[$r]=t + ;; + esac + done + # debugging + # e results = ${!results[@]} + + keys=(DKIM_SIGNED DKIM_VALID{,_AU,_EF} SPF_HELO_PASS SPF_PASS TVD_SPACE_RATIO) + if [[ $to == *@gnu.org && $from == *@gnu.org ]]; then + keys=(ALL_TRUSTED TVD_SPACE_RATIO) + # from eggs had DKIMWL_WL_HIGH sometime in 2022, then DKIMWL_WL_MED unti march 2023 + fi + + for t in ${keys[@]}; do + if [[ ${results[$t]} ]]; then + unset "results[$t]" + elif [[ $t == DKIM_VALID_EF && $from == *@[^.]*.[^.]*.[^.]* ]]; then + : + # third level domains dont hit this. its because + # /usr/share/perl5/Mail/SpamAssassin/Plugin/DKIM.pm checks + # if its signed with the registryboundaries domain. afaik: + # we need the actual domain to sign it, this would result in + # a second signature. I only use second level domains for + # testing atm, fsf doesnt use them for anything but the + # forum and I dont expect that to have any deliverability + # problems. So, not bothering atm. + else + missing+=($t) + fi + done +} #### begin arg processing #### + +do_spama=false + # spamassassin checking takes about 8 seconds. slow=false if [[ $1 == slow ]]; then @@ -87,10 +215,11 @@ fi maini=0 -spamd_ser=spamd -if systemctl cat spamassassin &>/dev/null; then - spamd_ser=spamassassin -fi +# spamd_ser=spamd +# if systemctl cat spamassassin &>/dev/null; then +# spamd_ser=spamassassin +# fi +spamd_ser=rspamd source /a/bin/bash_unpublished/source-state @@ -119,10 +248,7 @@ main() { *) folders=(/m/md/l/testignore) # save some cpu cycles - froms=(testignore@je.b8.nz ian@iankelling.org) - if (( maini % 10 == 0 )); then - froms=(testignore@je.b8.nz testignore@expertpathologyreview.com testignore@amnimal.ninja ian@iankelling.org z@zroe.org) - fi + froms=(testignore@je.b8.nz testignore@expertpathologyreview.com testignore@amnimal.ninja ian@iankelling.org z@zroe.org) if ! $int; then ### begin rsyncing fencepost email ### # We dont want to exit if rsync fails, that will get caught by @@ -177,26 +303,20 @@ EOF tmpfile=$(mktemp) declare -i unexpected=0 for folder in ${folders[@]}; do + awk '/^Subject: / {t=$4}; /^From: / {f=$2}; ENDFILE {print t, f, FILENAME}' $folder/new/* $folder/cur/* | sort -rn >$tmpfile for from in ${froms[@]}; do declare -i missing_dnswl=0 #declare -i dnsfail=0 declare -i unexpected=0 latest= last_sec=0 + tmp=$(awk '$2 == "'$from'" {print $1,$3; exit}' $tmpfile) + read -r last_sec latest <<<"$tmp" + if [[ ! $latest ]]; then - if ! grep -rlFx "From: $from" $folder/{new,cur} >$tmpfile; then echo "no message found from: $from" continue fi - # webmail sends them to cur it seems - while read -r file; do - file_sec=$(awk '/^Subject: / {print $4}' $file) - if [[ $file_sec ]] && (( file_sec > last_sec )); then - latest=$file - last_sec="$file_sec" - fi - done <$tmpfile - rm -f $tmpfile to=$(awk '/^Envelope-to: / {print $2}' $latest) @@ -209,70 +329,20 @@ EOF if [[ $(readlink /proc/$$/ns/net) != "$(readlink /proc/$spamdpid/ns/net)" ]]; then spamcpre="nsenter -t $spamdpid -n -m" fi + missing=() unset results declare -A results - # pyzor fails for our test message, so dont put useless load on their - # servers. - # example line that sed is parsing: - # (-0.1 / 5.0 requ) DKIM_SIGNED=0.1,DKIM_VALID=-0.1,DKIM_VALID_AU=-0.1,SPF_HELO_PASS=-0.001,SPF_PASS=-0.001,TVD_SPACE_RATIO=0.001 autolearn=_AUTOLEARN - resultfile=$(mktemp) - # add -D for debug info. usually it - $spamcpre sudo -u Debian-exim spamassassin -t --cf='score PYZOR_CHECK 0' <"$latest" &>$resultfile - - # note: on some mail, its 1 line after the send-test-forward, on others its 2 with a blank inbetween. - # I use the sed -n to filter this. - raw_results="$(tail $resultfile | grep -A2 -Fx /usr/local/bin/send-test-forward | tail -n+2 | sed -nr 's/^\([^)]*\) *//;s/=[^, ]*([, ]|$)/ /gp')" - for r in $raw_results; do - case $r in - # This came in t12, but its just dkim + spf, and my - # systems aren't all t12, so ignore it for now. - DMARC_PASS) : ;; - # got this in an update 2022-01. dun care - T_SCC_BODY_TEXT_LINE|SCC_BODY_SINGLE_WORD) : ;; - # we have a new domain, ignore this. - # it seems like some versions of spamassassin do BODY_SINGLE_WORD, others dont, we dun care. - # bayes_00 is a new one indicating ham, we dont care if its missing. - BAYES_00|BODY_SINGLE_WORD|FROM_FMBLA_NEWDOM*|autolearn) : ;; - - # These have somewhat randomly been added and removed, resulting in useless alerts, so ignore them. - RCVD_IN_DNSWL_MED|DKIMWL_WL_HIGH) : ;; - - SPF_HELO_NEUTRAL) - # some of my domains use neutral spf, treat them the same. - results[SPF_HELO_PASS]=t - ;; - *) - results[$r]=t - ;; - esac - done - # debugging - # e results = ${!results[@]} - missing=() + # It would be useful for debugging & development to optionally + # run rspamc here but I haven't totally figured out + # rspamc, i might need to pass --helo=helo_string to avoid + # hostname_unknown result. - keys=(DKIM_SIGNED DKIM_VALID{,_AU,_EF} SPF_HELO_PASS SPF_PASS TVD_SPACE_RATIO) - if [[ $to == *@gnu.org && $from == *@gnu.org ]]; then - keys=(ALL_TRUSTED TVD_SPACE_RATIO) - # from eggs had DKIMWL_WL_HIGH sometime in 2022, then DKIMWL_WL_MED unti march 2023 + if $do_spama; then + spamc-process + else + rspamc-process fi - for t in ${keys[@]}; do - if [[ ${results[$t]} ]]; then - unset "results[$t]" - elif [[ $t == DKIM_VALID_EF && $from == *@[^.]*.[^.]*.[^.]* ]]; then - : - # third level domains dont hit this. its because - # /usr/share/perl5/Mail/SpamAssassin/Plugin/DKIM.pm checks - # if its signed with the registryboundaries domain. afaik: - # we need the actual domain to sign it, this would result in - # a second signature. I only use second level domains for - # testing atm, fsf doesnt use them for anything but the - # forum and I dont expect that to have any deliverability - # problems. So, not bothering atm. - else - missing+=($t) - fi - done if (( ${#results[@]} || ${#missing[@]} )); then printf "$HOSTNAME spamtest %s\n" "$latest" if (( ${#results[@]} )); then @@ -282,7 +352,7 @@ EOF printf "missing %s" "${missing[*]}" fi echo # ends our printf string buildup - cat $resultfile + if [[ $resultstr ]]; then printf "%s\n" "$resultstr"; fi echo mailtest-check: end of spam debug results # lets just handle 1 failure at a time in interactive mode. if $int; then @@ -297,7 +367,6 @@ EOF # echo mailtest-check: end of cat #fi fi - rm -f $resultfile for r in ${results[@]}; do case $r in # iank: for when we want to handle dns errors differently. @@ -339,6 +408,7 @@ mailtest_check_last_usec{folder="$folder",from="$from"} $last_sec EOF done # end for from in ${froms[@]} done # end for folder in ${folders[@]} + rm -f $tmpfile dir=/var/lib/prometheus/node-exporter path=$dir/mailtest-check.prom.$$ diff --git a/subdir_files/.local/share/konsole/profileian.profile b/subdir_files/.local/share/konsole/profileian.profile index 55cd61e..2893b26 100644 --- a/subdir_files/.local/share/konsole/profileian.profile +++ b/subdir_files/.local/share/konsole/profileian.profile @@ -15,7 +15,7 @@ SemanticInputClick=true SemanticUpDown=false [Interaction Options] -OpenLinksByDirectClickEnabled=true +OpenLinksByDirectClickEnabled=false TextEditorCmd=6 TextEditorCmdCustom=/a/exe/g +LINE:COLUMN PATH UnderlineFilesEnabled=true diff --git a/subdir_files/sieve/maintest.sieve b/subdir_files/sieve/maintest.sieve index fefbd71..73f409d 100644 --- a/subdir_files/sieve/maintest.sieve +++ b/subdir_files/sieve/maintest.sieve @@ -3,15 +3,6 @@ ## require [ "regex", "variables", "fileinto", "envelope", "mailbox", "imap4flags", "include" ]; -# many examples out there check for "X-Spam-Status" "^Yes", but we do -# this in exim, which doesn't add that by default. We could modify it's -# config to add $spam_action to a header, like other headers, but simply -# using an integer threshold here is simpler: the default threshold for -# spamassassin is 5, so we have 5 plus symbols here. -if header :regex "x-spam_bar" "^\\+{5}" { - fileinto :create "Junk"; - stop; -} include :personal "personaltest"; include :personal "liststest";