minor bug fix
[distro-setup] / system-status
1 #!/bin/bash
2 # Copyright (C) 2019 Ian Kelling
3 # SPDX-License-Identifier: AGPL-3.0-or-later
4
5 # usage: runs 4 times every 15 seconds unless any args are passed, or we
6 # are on battery power, then just runs once.
7
8 if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi
9
10 source /a/bin/errhandle/err
11 status_file=/dev/shm/iank-status
12
13 shopt -s nullglob
14 shopt -s dotglob
15
16 verbose=false
17 if [[ $1 ]]; then
18 verbose=true
19 fi
20 v() {
21 if $verbose; then
22 printf "%s\n" "$*"
23 fi
24 }
25 lo() { /usr/local/bin/log-once "$@"; }
26
27 write-status() {
28 chars=("${first_chars[@]}")
29
30 glob=(/nocow/btrfs-stale/*)
31 if [[ -e ${glob[0]} ]]; then
32 chars+=("STALE!")
33 fi
34 glob=(/m/md/bounces/new/*)
35 if [[ -e ${glob[0]} ]]; then
36 chars+=("BOUNCE!")
37 bouncemsg="message in /m/md/bounces/new"
38 fi
39 lo -1 bounce $bouncemsg
40 glob=(/m/md/alerts/new/* /m/md/alerts/cur/*)
41 if [[ -e ${glob[0]} ]]; then
42 chars+=("ALERT!")
43 fi
44 if [[ -e /nocow/user/mailtest-failure ]]; then
45 chars+=("MAILPING!")
46 fi
47
48 if ! qlen=$(/usr/sbin/exiqgrep -o 60 -c -b | awk '{print $1}'); then
49 # early in install process, we dont have permission yet for exiqgrep
50 qlen=$(sudo /usr/sbin/exiqgrep -o 60 -c -b | awk '{print $1}')
51 fi
52 if ((qlen)); then
53 chars+=("q $qlen")
54 fi
55
56 begin=false
57 cd /b/ds
58 if ! make -q ~/.local/distro-begin || [[ $(<~/.local/distro-begin) != 0 ]]; then
59 begin=true
60 fi
61
62 end=false
63 if ! make -q ~/.local/distro-end || [[ $(<~/.local/distro-end) != 0 ]]; then
64 end=true
65 fi
66
67 if $begin && $end; then
68 chars+=("DISTRO!")
69 elif $begin; then
70 chars+=("DISTRO-BEGIN!")
71 elif $end; then
72 chars+=("DISTRO-END!")
73 fi
74
75 f=~/.local/conflink
76 for _ in 1; do
77 if [[ -e $f ]]; then
78 now=$(date +%s)
79 fsec=$(stat -c%Y $f)
80 # the / 60 makes it 0-59 seconds less strict, +1 makes sure we
81 # dont have any false positives.
82 fmin=$(( (fsec - now ) / 60 + 1 ))
83 fminplus=$(( fmin + 60*24 ))
84 # Filesystem files get copied, so find any newer than the last run.
85 # The rest are hueristics:
86 # Given the last time we added a file in git, is that newer than the last conflink run.
87 # Given new files not added to git, were they modified more recently than the last conflink? but,
88 # push their modification time back by a day so we can develop them before needing to add them to git.
89
90 all_dirs=({/a/bin/ds,/p/c}{/filesystem,/machine_specific/$HOSTNAME/filesystem})
91 # This part is copied from conflink
92 for x in /p/c/machine_specific/*.hosts /a/bin/ds/machine_specific/*.hosts; do
93 if grep -qxF $HOSTNAME $x; then all_dirs+=( ${x%.hosts} ); fi
94 done
95
96 # just because i forget a lot, -mmin -NUM means files modified <= NUM minutes ago
97 if (( $(date -d "$(git log --diff-filter=ACR --format=%aD -1)" +%s) > fsec )) || \
98 [[ $(find ${all_dirs[@]} -mmin $fmin -type f -print -quit 2>/dev/null) ]]; then
99 v conflink newer git or newer filesystem files
100 chars+=("CONFLINK!")
101 break
102 fi
103
104 for d in /a/bin/distro-setup /p/c; do
105 cd $d
106 if [[ ! -e .git ]]; then
107 # some hosts i dont push all of /p/c
108 continue
109 fi
110 untracked=$(git ls-files -o --exclude-standard)
111 if [[ $untracked && $(find $untracked -mmin $fminplus -type f -print -quit) ]]; then
112 v conflink: untracked in $d
113 chars+=("CONFLINK!")
114 break
115 fi
116 done
117
118 fi
119 if [[ ! -e $f || $(<$f) != 0 ]]; then
120 v conflink: last run not found or failed
121 chars+=("CONFLINK!")
122 break
123 fi
124 done
125
126
127 ## Clean the paniclog, but only up to 4 times per day, or else we
128 ## should investigate.
129 loglog=/tmp/panicloglog-$(date --rfc-3339=date)
130 if [[ -s $loglog ]]; then
131 spamcount=$(stat -c%s $loglog)
132 else
133 spamcount=0
134 fi
135 if (( spamcount <= 4 )); then
136 if grep -q 'spam acl condition' /var/log/exim4/paniclog &>/dev/null; then
137 printf . >>$loglog
138 fi
139 /a/bin/distro-setup/epanic-clean
140 fi
141
142 if [[ -s /var/log/exim4/paniclog ]]; then
143 chars+=("PANIC!")
144 tail -n 20 /var/log/exim4/paniclog | lo -1 paniclog
145 else
146 lo -1 paniclog
147 fi
148
149 source /a/bin/bash_unpublished/source-state
150 if [[ $MAIL_HOST == "$HOSTNAME" ]]; then
151 bbkmsg=
152 if [[ $(systemctl is-active btrbk.timer) != active ]]; then
153 chars+=("BTRBK.TIMER!")
154 bbkmsg="btrbk.timer not enabled"
155 fi
156 lo -60 btrbk.timer $bbkmsg
157
158 ## check if last snapshot was within an hour
159 vol=o
160 # this section generally copied from btrbk scripts, but
161 # this part modified to speed things up by about half a second.
162 # I'm not sure if its quite as reliable, but it looks pretty safe.
163 # Profiled it using time and also adding to the top of the file:
164 # set -x
165 # PS4='+ $(date "+%2N") '
166 snaps=($(ls -1avdr /mnt/root/btrbk/$vol.20*|head -n1))
167 now=$(date +%s)
168 maxtime=0
169 for s in ${snaps[@]}; do
170 file=${s##*/}
171 t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s)
172 if (( t > maxtime )); then
173 maxtime=$t
174 fi
175 done
176 if (( maxtime < now - 60*60 )); then
177 chars+=("OLD-SNAP!")
178 snapshotmsg="/o snapshot older than 1 hour"
179 fi
180 lo -1 old-snapshot $snapshotmsg
181 fi
182
183 cat /a/bin/bash_unpublished/source-state >$status_file
184
185 if [[ ${chars[*]} ]]; then
186 echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file
187 fi
188
189 }
190 # use this if we want to do something just once per minute
191 first_chars=()
192
193 power=true
194 if [[ -e /sys/class/power_supply/AC/online && $(</sys/class/power_supply/AC/online) == 0 ]]; then
195 power=false
196 fi
197
198 write-status
199 if [[ $1 ]]; then
200 cat $status_file
201 exit 0
202 fi
203
204 if ! $power; then
205 exit 0
206 fi
207 for ((i=1; i<=3; i++)); do
208 sleep 15
209 write-status
210 done