minor fixes and improvements
[distro-setup] / epanic-clean
1 #!/bin/bash
2 # Copyright (C) 2019 Ian Kelling
3 # SPDX-License-Identifier: AGPL-3.0-or-later
4
5 # The panic log regularly gets some stuff in it we dont want to fix.
6 # Detect it and wipe it out.
7
8 if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi
9
10 shopt -s inherit_errexit 2>/dev/null ||: # ignore fail in bash < 4.4
11 set -eE -o pipefail
12 trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
13
14 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
15
16 debug=false
17 if [[ $1 ]]; then
18 debug=true
19 fi
20
21
22 d() {
23 if $debug; then
24 printf "%s\n" "$*"
25 fi
26 }
27
28
29 pl=/var/log/exim4/paniclog
30 main() {
31 if [[ ! -s $pl ]]; then
32 return 0
33 fi
34
35 # seems to randomly be caused by
36 # Starting exim4-base housekeeping, exim4-base.service
37 regex="^[^ ]* 00:00:0.* Failed writing transport results to pipe: Broken pipe$"
38 grep "$regex" $pl >> $pl-archive ||:
39 sed -i "/$regex/d" $pl
40
41 ## begin broken pipe ##
42 regex="Failed writing transport results to pipe: Broken pipe$"
43 now_s=$(date +%s)
44 newlines=false
45 count=0
46 while read -r day time _; do
47 log_s=$(date -d "$day $time" +%s)
48 count=$((count+1))
49 if (( now_s - 300 > log_s )); then
50 newlines=true
51 fi
52 done < <(grep "$regex" $pl ||:)
53 if (( count )); then
54 # i see these in groups of 3 for the same message around once a day
55 # randomly. I'm guessing they are related to running 2 instances of
56 # exim which share the same spool. So, if we have some, but not in
57 # the last 5 minutes, and less than 20, it should be fine to clear
58 # them.
59 if (( count > 20 )); then
60 cat $pl
61 elif ! $newlines; then
62 grep "$regex" $pl >>$pl-archive
63 sed -i "/$regex/d" $pl
64 fi
65 fi
66 ## end broken pipe ##
67
68 while read -r service regex; do
69 found=false
70 wipe=true
71 d "$service $regex"
72 while read -r d1 d2; do
73 d "$d1 $d2"
74 found=true
75 tmptime=$(date -d "$d1 $d2" +%s)
76 # dont consider every matching line, just those in > 60 second intervals
77 if [[ ! $logtime ]]; then
78 logtime=$tmptime
79 elif (( tmptime > logtime + 60 )); then
80 logtime=$tmptime
81 else
82 continue
83 fi
84 sec_min=$((logtime - 60))
85 sec_max=$((logtime + 60))
86 jmin="$(date -d @$sec_min "+%F %H:%M:%S")"
87 jmax="$(date -d @$sec_max "+%F %H:%M:%S")"
88 description=$(systemctl cat $service | sed -rn 's/^ *Description=(.*)/\1/p')
89 jrregex="^Starting $description"
90 if [[ $service == spamassassin ]]; then
91 jrregex+="\|^spamd: restarting"
92 fi
93 d "jrregex=$jrregex jmin=$jmin jmax=$jmax"
94 # the sed clears out the initial time and process+pid
95 if ! journalctl -u $service -S "$jmin" -U "$jmax" \
96 | sed -r 's/^([^[:space:]]*[[:space:]]+){5}//' | grep "$jrregex" &>/dev/null; then
97 wipe=false
98 break
99 fi
100 done < <(awk "/$regex/ "'{print $1,$2}' $pl)
101 if $found && $wipe; then
102 d "wiping $regex"
103 if [[ ! -w $pl-archive ]]; then
104 touch $pl-archive
105 chgrp adm $pl-archive
106 chmod 664 $pl-archive
107 fi
108 grep -E "$regex" $pl >> $pl-archive ||:
109 sed -ri "/$regex/d" $pl
110 fi
111 done <<'EOF'
112 clamav-daemon malware acl condition
113 spamassassin spam acl condition
114 EOF
115 }
116
117 if [[ $INVOCATION_ID ]]; then
118 # this is to prevent systemd from filling up the journal
119 for (( runcount=0; runcount < 100; runcount++ )); do
120 main
121 sleep 30
122 done
123 else
124 main
125 fi