various fixes
[distro-setup] / system-status
1 #!/bin/bash
2 # Copyright (C) 2019 Ian Kelling
3 # SPDX-License-Identifier: AGPL-3.0-or-later
4
5 # usage: runs 4 times every 15 seconds unless any args are passed, or we
6 # are on battery power, then just runs once.
7
8 if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi
9
10 source /a/bin/errhandle/err
11 status_file=/dev/shm/iank-status
12
13 shopt -s nullglob
14 shopt -s dotglob
15
16 verbose=false
17 if [[ $1 ]]; then
18 verbose=true
19 fi
20 v() {
21 if $verbose; then
22 printf "%s\n" "$*"
23 fi
24 }
25 lo() { /usr/local/bin/log-once "$@"; }
26
27 write-status() {
28 chars=("${first_chars[@]}")
29
30 glob=(/nocow/btrfs-stale/*)
31 if [[ -e ${glob[0]} ]]; then
32 chars+=("STALE!")
33 fi
34 if [[ $(find /var/mail -type f \! -empty -print -quit) ]]; then
35 var_mail_msg="message in /var/mail"
36 fi
37 lo -1 var_mail $var_mail_msg
38 glob=(/m/md/bounces/new/*)
39 if [[ -e ${glob[0]} ]]; then
40 chars+=("BOUNCE!")
41 bouncemsg="message in /m/md/bounces/new"
42 fi
43 lo -1 bounce $bouncemsg
44 glob=(/m/md/alerts/new/* /m/md/alerts/cur/*)
45 if [[ -e ${glob[0]} ]]; then
46 chars+=("ALERT!")
47 fi
48 if [[ -e /nocow/user/mailtest-failure ]]; then
49 chars+=("MAILPING!")
50 fi
51
52 if ! qlen=$(/usr/sbin/exiqgrep -o 60 -c -b | awk '{print $1}'); then
53 # early in install process, we dont have permission yet for exiqgrep
54 qlen=$(sudo /usr/sbin/exiqgrep -o 60 -c -b | awk '{print $1}')
55 fi
56 if ((qlen)); then
57 qmsg="queue length $qlen"
58 chars+=("q $qlen")
59 fi
60 case $HOSTNAME in
61 # No point in emailing about the mailq on a host where we don't
62 # check email.
63 $MAIL_HOST|l2)
64 lo -1 qlen $qmsg
65 ;;
66 esac
67
68 begin=false
69 if ! make -C /b/ds -q ~/.local/distro-begin || [[ $(<~/.local/distro-begin) != 0 ]]; then
70 begin=true
71 fi
72
73 end=false
74 if ! make -C /b/ds -q ~/.local/distro-end || [[ $(<~/.local/distro-end) != 0 ]]; then
75 end=true
76 fi
77
78 # these conditions are so we dont have an overly verbose prompt
79 if $begin && $end; then
80 chars+=("DISTRO!")
81 elif $begin; then
82 chars+=("DISTRO-BEGIN!")
83 elif $end; then
84 chars+=("DISTRO-END!")
85 else
86 f=~/.local/conflink
87 for _ in 1; do
88 if [[ -e $f ]]; then
89 now=$(date +%s)
90 fsec=$(stat -c%Y $f)
91 # the / 60 makes it 0-59 seconds less strict, +1 to help make sure we
92 # dont have any false positives.
93 fmin=$(( (fsec - now + 1 ) / 60 ))
94 fminplus=$(( fmin + 60*24 ))
95 # Filesystem files get copied, so find any newer than the last run.
96 # The rest are hueristics:
97 # Given the last time we added a file in git, is that newer than the last conflink run.
98 # Given new files not added to git, were they modified more recently than the last conflink? but,
99 # push their modification time back by a day so we can develop them before needing to add them to git.
100
101 all_dirs=({/a/bin/ds,/p/c}{/filesystem,/machine_specific/$HOSTNAME/filesystem})
102 # This part is copied from conflink
103 for x in /p/c/machine_specific/*.hosts /a/bin/ds/machine_specific/*.hosts; do
104 if grep -qxF $HOSTNAME $x; then all_dirs+=( ${x%.hosts} ); fi
105 done
106
107 # Just because i forget a lot, -mmin -NUM means files modified <= NUM minutes ago
108 if (( fmin < 0 )) && [[ $(find ${all_dirs[@]} -mmin $fmin -type f -print -quit 2>/dev/null) ]]; then
109 v conflink newer filesystem files
110 chars+=("CONFLINK!")
111 break
112 fi
113
114 for d in /a/bin/distro-setup /p/c; do
115 cd $d
116 if [[ ! -e .git ]]; then
117 # some hosts i dont push all of /p/c
118 continue
119 fi
120 if (( $(date -d "$(git log --diff-filter=ACR --format=%aD -1)" +%s) > fsec )); then
121 v conflink: newer files checked in to git
122 chars+=("CONFLINK!")
123 break
124 fi
125
126 untracked=$(git ls-files -o --exclude-standard)
127 if [[ $untracked && $(find $untracked -mmin $fminplus -type f -print -quit) ]]; then
128 v conflink: untracked in $d
129 chars+=("CONFLINK!")
130 break
131 fi
132 done
133 cd /
134
135 fi
136 if [[ ! -e $f || $(<$f) != 0 ]]; then
137 v conflink: last run not found or failed
138 chars+=("CONFLINK!")
139 break
140 fi
141 done
142 fi
143
144
145 ## Clean the paniclog, but only up to 4 times per day, or else we
146 ## should investigate.
147 loglog=/tmp/panicloglog-$(date --rfc-3339=date)
148 if [[ -s $loglog ]]; then
149 spamcount=$(stat -c%s $loglog)
150 else
151 spamcount=0
152 fi
153 if (( spamcount <= 4 )); then
154 if grep -q 'spam acl condition' /var/log/exim4/paniclog &>/dev/null; then
155 printf . >>$loglog
156 fi
157 /a/bin/distro-setup/epanic-clean
158 fi
159
160 if [[ -s /var/log/exim4/paniclog ]]; then
161 chars+=("PANIC!")
162 tail -n 20 /var/log/exim4/paniclog | lo -1 paniclog
163 else
164 lo -1 paniclog
165 fi
166
167 source /a/bin/bash_unpublished/source-state
168 if [[ $MAIL_HOST == "$HOSTNAME" ]]; then
169 bbkmsg=
170 if [[ $(systemctl is-active btrbk.timer) != active ]]; then
171 chars+=("BTRBK.TIMER!")
172 bbkmsg="btrbk.timer not enabled"
173 fi
174 lo -60 btrbk.timer $bbkmsg
175
176 ## check if last snapshot was within an hour
177 vol=o
178 # this section generally copied from btrbk scripts, but
179 # this part modified to speed things up by about half a second.
180 # I'm not sure if its quite as reliable, but it looks pretty safe.
181 # Profiled it using time and also adding to the top of the file:
182 # set -x
183 # PS4='+ $(date "+%2N") '
184 snaps=($(ls -1avdr /mnt/root/btrbk/$vol.20*|head -n1 || [[ $? == 141 ]] ))
185 now=$(date +%s)
186 maxtime=0
187 for s in ${snaps[@]}; do
188 file=${s##*/}
189 t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s)
190 if (( t > maxtime )); then
191 maxtime=$t
192 fi
193 done
194 if (( maxtime < now - 60*60 )); then
195 chars+=("OLD-SNAP!")
196 snapshotmsg="/o snapshot older than 1 hour"
197 fi
198 lo -1 old-snapshot $snapshotmsg
199 fi
200
201 cat /a/bin/bash_unpublished/source-state >$status_file
202
203 if [[ ${chars[*]} ]]; then
204 echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file
205 fi
206
207 }
208 # use this if we want to do something just once per minute
209 first_chars=()
210
211 power=true
212 if [[ -e /sys/class/power_supply/AC/online && $(</sys/class/power_supply/AC/online) == 0 ]]; then
213 power=false
214 fi
215
216 write-status
217 if [[ $1 ]]; then
218 cat $status_file
219 exit 0
220 fi
221
222 if ! $power; then
223 exit 0
224 fi
225 for ((i=1; i<=3; i++)); do
226 sleep 15
227 write-status
228 done