2 # Copyright (C) 2019 Ian Kelling
3 # SPDX-License-Identifier: AGPL-3.0-or-later
5 # The panic log regularly gets some stuff in it we dont want to fix.
6 # Detect it and wipe it out.
8 if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi
10 shopt -s inherit_errexit
2>/dev
/null ||
: # ignore fail in bash < 4.4
12 trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
14 [[ $EUID == 0 ]] ||
exec sudo
-E "${BASH_SOURCE[0]}" "$@"
35 pl
=/var
/log
/exim
4/paniclog
38 if [[ ! -s $pl ]]; then
39 echo "exim_paniclog $pr_metric" >/var
/lib
/prometheus
/node-exporter
/exim_paniclog.prom
44 # 2022-02-09 22:08:14.683 [59759] socket bind() to port 25 for address 10.8.0.28 failed: Cannot assign requested address: daemon abandoned
45 if [[ -e /etc
/systemd
/system
/exim4.service.d
/backup.conf
]]; then
46 regex
="socket bind() to port 25 for address"
47 if grep "$regex" $pl |
& tee -a $pl-archive; then
48 v
"above is from grep $regex"
49 sed -i "/$regex/d" $pl
53 # this is a strange message due to running as nonroot
54 # regex='exim user lost privilege for using -C option'
55 # sed -i "/$regex/d" $pl
57 # seems to randomly be caused by
58 # Starting exim4-base housekeeping, exim4-base.service
59 regex
="^[^ ]* 00:00:0.* Failed writing transport results to pipe: Broken pipe$"
60 if grep "$regex" $pl |
& tee -a $pl-archive; then
61 v
"above is from grep $regex"
62 sed -i "/$regex/d" $pl
65 ### begin removing panic lines due to service restarts ###
66 while read -r service regex
; do
70 while read -r d1 d2
; do
72 tmptime
=$
(date -d "$d1 $d2" +%s
)
73 # Checking the journal takes a second or two, so
74 # dont consider every matching line, just those > 20 seconds apart. We are
75 # testing the journal for 60 seconds after the message, so should be ok.
76 # It probably makes sense to even check for >59 seconds apart, using 20
77 # seconds to be conservative.
78 if [[ ! $logtime ]]; then
80 elif (( tmptime
> logtime
+ 20 )); then
86 sec_min
=$
((logtime
- 60))
87 sec_max
=$
((logtime
+ 60))
88 jmin
="$(date -d @$sec_min "+%F
%H
:%M
:%S
")"
89 jmax
="$(date -d @$sec_max "+%F
%H
:%M
:%S
")"
90 description
=$
(systemctl
cat $service |
sed -rn 's/^ *Description=(.*)/\1/p')
91 jrregex
="^Starting $description"
92 if [[ $service == spamassassin
]]; then
93 jrregex
+="\|^spamd: restarting"
95 d
"jrregex=$jrregex jmin=$jmin jmax=$jmax"
96 # the sed clears out the initial time and process+pid
97 if journalctl
-u $service -S "$jmin" -U "$jmax" \
98 |
sed -r 's/^([^[:space:]]*[[:space:]]+){5}//' |
grep "$jrregex" &>/dev
/null
; then
99 v
"messages worth wiping in: journalctl -u $service -S '$jmin' -U '$jmax' | sed -r 's/^([^[:space:]]*[[:space:]]+){5}//' | grep '$jrregex':"
101 v
"PANIC: message not found via: journalctl -u $service -S '$jmin' -U '$jmax' | sed -r 's/^([^[:space:]]*[[:space:]]+){5}//' | grep '$jrregex'"
105 done < <(awk "/$regex/ "'{print $1,$2}' $pl)
106 if $found && $wipe; then
108 if grep -E "$regex" $pl |
& tee -a $pl-archive; then
109 v
"above is from grep -E $regex"
110 sed -ri "/$regex/d" $pl
114 clamav-daemon malware acl condition
115 spamassassin spam acl condition
117 ### end removing panic lines due to service restarts ###
120 ## begin broken pipe & write lock & general alert ##
121 regex
="Failed to get write lock\|Failed writing transport results to pipe: Broken pipe$"
124 while read -r day
time _
; do
125 log_s
=$
(date -d "$day $time" +%s
)
127 if (( log_s
> EPOCHSECONDS
- 300 )); then
130 done < <(grep "$regex" $pl ||
:)
132 # I see broken pipe in groups of 3 for the same message around once a day
133 # randomly. I'm guessing they are related to running 2 instances of
134 # exim which share the same spool. So, if we have some, but not in
135 # the last 5 minutes, and less than 20, it should be fine to clear
136 # them. write lock happens less but can fit under the same rule.
137 if (( count
> 20 )); then
139 elif ! $newlines; then
140 grep "$regex" $pl |
& tee -a $pl-archive
141 v
"above is from grep $regex"
142 sed -i "/$regex/d" $pl
146 # I think we could alert on anything else older than 61 seconds,
147 # but lets just add some slack, make it 2 minutes.
148 while read -r day
time _
; do
149 # some lines dont have dates, just skip them
150 # 2022-09-16 15:21:06.250 [438097] Exim configuration error:
151 # can't redefine an undefined macro "REMOTE_SMTP_SMARTHOST_TLS_VERIFY_HOSTS"
152 if [[ $day != [2-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] ]]; then
155 log_s
=$
(date -d "$day $time" +%s
)
156 if (( EPOCHSECONDS
- 120 > log_s
)); then
159 # pr_metrix for $regex is handled above
160 done < <(grep -v "$regex" $pl ||
:)
161 ## end broken pipe ##
163 echo "exim_paniclog $pr_metric" >/var
/lib
/prometheus
/node-exporter
/exim_paniclog.prom
175 if [[ ! -w $pl-archive ]]; then
177 chgrp adm
$pl-archive
178 chmod 664 $pl-archive
181 if [[ $INVOCATION_ID ]]; then