#!/bin/bash # Copyright (C) 2019 Ian Kelling # SPDX-License-Identifier: AGPL-3.0-or-later # The panic log regularly gets some stuff in it we dont want to fix. # Detect it and wipe it out. if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi shopt -s inherit_errexit 2>/dev/null ||: # ignore fail in bash < 4.4 set -eE -o pipefail trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@" debug=false if [[ $1 ]]; then debug=true fi d() { if $debug; then printf "%s\n" "$*" fi } pl=/var/log/exim4/paniclog main() { if [[ ! -s $pl ]]; then return 0 fi # example line: # 2022-02-09 22:08:14.683 [59759] socket bind() to port 25 for address 10.8.0.28 failed: Cannot assign requested address: daemon abandoned if [[ -e /etc/systemd/system/exim4.service.d/backup.conf ]]; then regex="socket bind() to port 25 for address" grep "$regex" $pl >> $pl-archive ||: sed -i "/$regex/d" $pl fi # seems to randomly be caused by # Starting exim4-base housekeeping, exim4-base.service regex="^[^ ]* 00:00:0.* Failed writing transport results to pipe: Broken pipe$" grep "$regex" $pl >> $pl-archive ||: sed -i "/$regex/d" $pl ## begin broken pipe & write lock ## regex="Failed to get write lock\|Failed writing transport results to pipe: Broken pipe$" now_s=$(date +%s) newlines=false count=0 while read -r day time _; do log_s=$(date -d "$day $time" +%s) count=$((count+1)) if (( now_s - 300 > log_s )); then newlines=true fi done < <(grep "$regex" $pl ||:) if (( count )); then # I see broken pipe in groups of 3 for the same message around once a day # randomly. I'm guessing they are related to running 2 instances of # exim which share the same spool. So, if we have some, but not in # the last 5 minutes, and less than 20, it should be fine to clear # them. write lock happens less but can fit under the same rule. if (( count > 20 )); then cat $pl elif ! $newlines; then grep "$regex" $pl >>$pl-archive sed -i "/$regex/d" $pl fi fi ## end broken pipe ## while read -r service regex; do found=false wipe=true d "$service $regex" while read -r d1 d2; do d "$d1 $d2" found=true tmptime=$(date -d "$d1 $d2" +%s) # dont consider every matching line, just those in > 60 second intervals if [[ ! $logtime ]]; then logtime=$tmptime elif (( tmptime > logtime + 60 )); then logtime=$tmptime else continue fi sec_min=$((logtime - 60)) sec_max=$((logtime + 60)) jmin="$(date -d @$sec_min "+%F %H:%M:%S")" jmax="$(date -d @$sec_max "+%F %H:%M:%S")" description=$(systemctl cat $service | sed -rn 's/^ *Description=(.*)/\1/p') jrregex="^Starting $description" if [[ $service == spamassassin ]]; then jrregex+="\|^spamd: restarting" fi d "jrregex=$jrregex jmin=$jmin jmax=$jmax" # the sed clears out the initial time and process+pid if ! journalctl -u $service -S "$jmin" -U "$jmax" \ | sed -r 's/^([^[:space:]]*[[:space:]]+){5}//' | grep "$jrregex" &>/dev/null; then wipe=false break fi done < <(awk "/$regex/ "'{print $1,$2}' $pl) if $found && $wipe; then d "wiping $regex" if [[ ! -w $pl-archive ]]; then touch $pl-archive chgrp adm $pl-archive chmod 664 $pl-archive fi grep -E "$regex" $pl >> $pl-archive ||: sed -ri "/$regex/d" $pl fi done <<'EOF' clamav-daemon malware acl condition spamassassin spam acl condition EOF } if [[ $INVOCATION_ID ]]; then # this is to prevent systemd from filling up the journal for (( runcount=0; runcount < 100; runcount++ )); do main sleep 30 done else main fi