make exim and wireguard restart waiting for internet. untested
[distro-setup] / epanic-clean
1 #!/bin/bash
2 # Copyright (C) 2019 Ian Kelling
3 # SPDX-License-Identifier: AGPL-3.0-or-later
4
5 # The panic log regularly gets some stuff in it we dont want to fix.
6 # Detect it and wipe it out.
7
8 if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi
9
10 shopt -s inherit_errexit 2>/dev/null ||: # ignore fail in bash < 4.4
11 set -eE -o pipefail
12 trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
13
14 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
15
16 debug=false
17 if [[ $1 ]]; then
18 debug=true
19 fi
20
21
22 d() {
23 if $debug; then
24 printf "%s\n" "$*"
25 fi
26 }
27
28
29 pl=/var/log/exim4/paniclog
30 main() {
31 if [[ ! -s $pl ]]; then
32 return 0
33 fi
34
35 # example line:
36 # 2022-02-09 22:08:14.683 [59759] socket bind() to port 25 for address 10.8.0.28 failed: Cannot assign requested address: daemon abandoned
37 if [[ -e /etc/systemd/system/exim4.service.d/backup.conf ]]; then
38 regex="socket bind() to port 25 for address"
39 grep "$regex" $pl >> $pl-archive ||:
40 sed -i "/$regex/d" $pl
41 fi
42
43 # seems to randomly be caused by
44 # Starting exim4-base housekeeping, exim4-base.service
45 regex="^[^ ]* 00:00:0.* Failed writing transport results to pipe: Broken pipe$"
46 grep "$regex" $pl >> $pl-archive ||:
47 sed -i "/$regex/d" $pl
48
49 ## begin broken pipe & write lock ##
50 regex="Failed to get write lock\|Failed writing transport results to pipe: Broken pipe$"
51 now_s=$(date +%s)
52 newlines=false
53 count=0
54 while read -r day time _; do
55 log_s=$(date -d "$day $time" +%s)
56 count=$((count+1))
57 if (( now_s - 300 > log_s )); then
58 newlines=true
59 fi
60 done < <(grep "$regex" $pl ||:)
61 if (( count )); then
62 # I see broken pipe in groups of 3 for the same message around once a day
63 # randomly. I'm guessing they are related to running 2 instances of
64 # exim which share the same spool. So, if we have some, but not in
65 # the last 5 minutes, and less than 20, it should be fine to clear
66 # them. write lock happens less but can fit under the same rule.
67 if (( count > 20 )); then
68 cat $pl
69 elif ! $newlines; then
70 grep "$regex" $pl >>$pl-archive
71 sed -i "/$regex/d" $pl
72 fi
73 fi
74 ## end broken pipe ##
75
76 while read -r service regex; do
77 found=false
78 wipe=true
79 d "$service $regex"
80 while read -r d1 d2; do
81 d "$d1 $d2"
82 found=true
83 tmptime=$(date -d "$d1 $d2" +%s)
84 # dont consider every matching line, just those in > 60 second intervals
85 if [[ ! $logtime ]]; then
86 logtime=$tmptime
87 elif (( tmptime > logtime + 60 )); then
88 logtime=$tmptime
89 else
90 continue
91 fi
92 sec_min=$((logtime - 60))
93 sec_max=$((logtime + 60))
94 jmin="$(date -d @$sec_min "+%F %H:%M:%S")"
95 jmax="$(date -d @$sec_max "+%F %H:%M:%S")"
96 description=$(systemctl cat $service | sed -rn 's/^ *Description=(.*)/\1/p')
97 jrregex="^Starting $description"
98 if [[ $service == spamassassin ]]; then
99 jrregex+="\|^spamd: restarting"
100 fi
101 d "jrregex=$jrregex jmin=$jmin jmax=$jmax"
102 # the sed clears out the initial time and process+pid
103 if ! journalctl -u $service -S "$jmin" -U "$jmax" \
104 | sed -r 's/^([^[:space:]]*[[:space:]]+){5}//' | grep "$jrregex" &>/dev/null; then
105 wipe=false
106 break
107 fi
108 done < <(awk "/$regex/ "'{print $1,$2}' $pl)
109 if $found && $wipe; then
110 d "wiping $regex"
111 if [[ ! -w $pl-archive ]]; then
112 touch $pl-archive
113 chgrp adm $pl-archive
114 chmod 664 $pl-archive
115 fi
116 grep -E "$regex" $pl >> $pl-archive ||:
117 sed -ri "/$regex/d" $pl
118 fi
119 done <<'EOF'
120 clamav-daemon malware acl condition
121 spamassassin spam acl condition
122 EOF
123 }
124
125 if [[ $INVOCATION_ID ]]; then
126 # this is to prevent systemd from filling up the journal
127 for (( runcount=0; runcount < 100; runcount++ )); do
128 main
129 sleep 30
130 done
131 else
132 main
133 fi