+parse-rspamd() {
+ # rspamc uses $3.
+ awk '$1 == "Symbol:" && $2 !~ /\(0\.00\)/ && $3 !~ /\(0\.00\)/ {print $2}' | sed 's/(.*//'
+}
+
+rspamc-process() {
+
+ # note, this could in theory break since we aren't limiting it to the
+ # specific header. but that is unlikely, I'm doing all the header generation.
+ # example header:
+ # X-Spam_report: Action: no action
+ # Symbol: HFILTER_HOSTNAME_UNKNOWN(2.50)
+ # Symbol: RCVD_COUNT_TWO(0.00)
+ # Symbol: FROM_EQ_ENVFROM(0.00)
+ # Symbol: DMARC_POLICY_ALLOW(-0.50)
+ # Symbol: TO_DN_NONE(0.00)
+ # Symbol: TO_MATCH_ENVRCPT_SOME(0.00)
+ # Symbol: RCVD_TLS_LAST(0.00)
+ # Symbol: RBL_SENDERSCORE_FAIL(0.00)
+ # Symbol: R_DKIM_ALLOW(-0.20)
+ # Symbol: MIME_GOOD(-0.10)
+ # Symbol: MID_RHS_MATCH_FROM(0.00)
+ # Symbol: RCVD_IN_DNSWL_FAIL(0.00)
+ # Symbol: SINGLE_SHORT_PART(0.00)
+ # Symbol: R_SPF_ALLOW(-0.20)
+ # Symbol: ARC_NA(0.00)
+ # Symbol: ASN(0.00)
+ # Symbol: FROM_NO_DN(0.00)
+ # Symbol: MIME_TRACE(0.00)
+ # Symbol: MISSING_XM_UA(0.00)
+ # Symbol: RCPT_COUNT_THREE(0.00)
+ # Symbol: DKIM_TRACE(0.00)
+ # Message-ID: E1sLckD-004Ucv-P2@je.b8.nz
+
+ if [[ $to == jtuttle@gnu.org ]]; then
+ raw_results=$($spamcpre sudo -u _rspamd rspamc --helo=mail.iankelling.org --hostname=mail.iankelling.org <"$latest" |& parse-rspamd)
+ else
+ raw_results=$( parse-rspamd <"$latest")
+ fi
+ for r in $raw_results; do
+ case $r in
+ # based on my spamassassin experience, these may change and are not important.
+ RCVD_IN_DNSWL_MED|RCVD_DKIM_ARC_DNSWL_MED) : ;;
+ *)
+ results[$r]=t
+ ;;
+ esac
+ done
+ keys=(DMARC_POLICY_ALLOW R_DKIM_ALLOW MIME_GOOD R_SPF_ALLOW)
+ for t in ${keys[@]}; do
+ if [[ ${results[$t]} ]]; then
+ unset "results[$t]"
+ else
+ missing+=($t)
+ fi
+ done
+}
+
+spamc-process() {
+ # pyzor fails for our test message, so dont put useless load on their
+ # servers.
+ # example line that sed is parsing:
+ # (-0.1 / 5.0 requ) DKIM_SIGNED=0.1,DKIM_VALID=-0.1,DKIM_VALID_AU=-0.1,SPF_HELO_PASS=-0.001,SPF_PASS=-0.001,TVD_SPACE_RATIO=0.001 autolearn=_AUTOLEARN
+ # add -D for debug info. i haven't found it to be useful so it is off by default
+ resultstr=$($spamcpre sudo -u Debian-exim spamassassin -t --cf='score PYZOR_CHECK 0' <"$latest" 2>&1)
+ #resultstr=$($spamcpre sudo -u _rspamd rspamc <"$latest" 2>&1)
+
+ # note: on some mail, its 1 line after the send-test-forward,
+ # on others its 2 with a blank in between. I use the sed -n to
+ # filter this.
+ ## spamassassin parsing. disabled, using rspamd
+ raw_results="$(printf "%s\n" "$resultstr"| tail | grep -A2 -Fx /usr/local/bin/send-test-forward | tail -n+2 | sed -nr 's/^\([^)]*\) *//;s/=[^, ]*([, ]|$)/ /gp')"
+
+ # consider results we want to ignore or pre-process in some way.
+ for r in $raw_results; do
+ case $r in
+ # This came in t12, but its just dkim + spf, and my
+ # systems aren't all t12, so ignore it for now.
+ DMARC_PASS) : ;;
+ # got this in an update 2022-01. dun care
+ T_SCC_BODY_TEXT_LINE|SCC_BODY_SINGLE_WORD) : ;;
+ # we have a new domain, ignore this.
+ # it seems like some versions of spamassassin do BODY_SINGLE_WORD, others dont, we dun care.
+ # bayes_00 is a new one indicating ham, we dont care if its missing.
+ BAYES_00|BODY_SINGLE_WORD|FROM_FMBLA_NEWDOM*|autolearn) : ;;
+
+ # These have somewhat randomly been added and removed, resulting in useless alerts, so ignore them.
+ RCVD_IN_DNSWL_MED|DKIMWL_WL_HIGH) : ;;
+
+ SPF_HELO_NEUTRAL)
+ # some of my domains use neutral spf, treat them the same.
+ results[SPF_HELO_PASS]=t
+ ;;
+ *)
+ results[$r]=t
+ ;;
+ esac
+ done
+ # debugging
+ # e results = ${!results[@]}
+
+ keys=(DKIM_SIGNED DKIM_VALID{,_AU,_EF} SPF_HELO_PASS SPF_PASS TVD_SPACE_RATIO)
+ if [[ $to == *@gnu.org && $from == *@gnu.org ]]; then
+ keys=(ALL_TRUSTED TVD_SPACE_RATIO)
+ # from eggs had DKIMWL_WL_HIGH sometime in 2022, then DKIMWL_WL_MED unti march 2023
+ fi
+
+ for t in ${keys[@]}; do
+ if [[ ${results[$t]} ]]; then
+ unset "results[$t]"
+ elif [[ $t == DKIM_VALID_EF && $from == *@[^.]*.[^.]*.[^.]* ]]; then
+ :
+ # third level domains dont hit this. its because
+ # /usr/share/perl5/Mail/SpamAssassin/Plugin/DKIM.pm checks
+ # if its signed with the registryboundaries domain. afaik:
+ # we need the actual domain to sign it, this would result in
+ # a second signature. I only use second level domains for
+ # testing atm, fsf doesnt use them for anything but the
+ # forum and I dont expect that to have any deliverability
+ # problems. So, not bothering atm.
+ else
+ missing+=($t)
+ fi
+ done
+}