constantly firing timers cause systemd to think startup never finishes
[distro-setup] / system-status
1 #!/bin/bash
2 # Copyright (C) 2019 Ian Kelling
3 # SPDX-License-Identifier: AGPL-3.0-or-later
4
5 # usage: runs once every 15 seconds unless any args are passed, or we
6 # then just runs once. On battery power, run once per minute.
7
8 if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi
9
10 source /a/bin/errhandle/err
11 status_file=/dev/shm/iank-status
12
13 shopt -s nullglob
14 shopt -s dotglob
15 shopt -s extglob
16
17 for p in ~/.gem/ruby/*/bin; do
18 PATH="$PATH:$p"
19 done
20
21
22 verbose=false
23 if [[ $1 ]]; then
24 verbose=true
25 fi
26 v() {
27 if $verbose; then
28 printf "%s\n" "$*"
29 fi
30 }
31 # log-once COUNT NAME [MESSAGE]
32 lo() {
33 /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" root@localhost
34 }
35
36 loday() {
37 /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" daylerts@iankelling.org
38 }
39
40
41 write-status() {
42 chars=("${first_chars[@]}")
43
44 # clock us out in timetrap if are idle too long
45 if [[ -e /p/.timetrap.db ]]; then
46 export DISPLAY=:0
47 if type -p xprintidle &>/dev/null && xidle=$(xprintidle 2>/dev/null); then
48 if [[ $xidle == [0-9]* ]]; then
49 sheet=$(sqlite3 /p/.timetrap.db "select sheet from entries where end is NULL;")
50 idle=300000
51 if [[ $sheet == w ]]; then
52 idle=900000
53 fi
54 if [[ $sheet && $xidle -gt $idle ]]; then
55 timetrap out
56 fi
57 fi
58 fi
59 fi
60
61
62 if ip l show tunfsf &>/dev/null; then
63 # this is for tracking dns over tls issue, which
64 # fixvpndns() in brc2 fixes.
65 stat=$(resolvectl dnsovertls tunfsf 2>/dev/null ||: )
66 read _ _ _ istls <<<"$stat"
67 case $istls in
68 no) : ;;
69 *)
70 printf "%s\n" "$istls" | ts >> /tmp/istls.log
71 chars+=("T:$istls")
72 ;;
73 esac
74 fi
75
76
77 if pgrep -G iank -u iank -f 'emacs --daemon' &>/dev/null; then
78 emacsfiles="$(emacsclient --eval "$(cat /usr/local/bin/unsaved-buffers.el)"| sed '/^"nil"$/d;s/^"(/E: /;s/)"$//')"
79 if [[ $emacsfiles ]]; then
80 chars+=("$emacsfiles")
81 fi
82 fi
83
84 glob=(/nocow/btrfs-stale/*)
85 if [[ -e ${glob[0]} ]]; then
86 chars+=("STALE")
87 fi
88 if [[ $(find /var/mail -type f \! -empty -print -quit) ]]; then
89 var_mail_msg="message in /var/mail"
90 fi
91 loday -1 var_mail $var_mail_msg
92 glob=(/m/md/bounces/new/*)
93 if [[ -e ${glob[0]} ]]; then
94 chars+=("BOUNCE")
95 bouncemsg="message in /m/md/bounces/new"
96 fi
97 loday -1 bounce $bouncemsg
98 # emails without the S (seen) flag. this only checks the last flag,
99 # but its good enough for me.
100 glob=(/m/md/alerts/{new,cur}/!(*,S))
101 if [[ -e ${glob[0]} ]]; then
102 chars+=("A")
103 fi
104
105 glob=(/m/md/daylerts/{new,cur}/!(*,S))
106 if [[ -e ${glob[0]} ]]; then
107 chars+=("L")
108 fi
109
110
111 tmp=(/var/local/cron-errors/mailtest-check*)
112 if (( ${#tmp[@]} )); then
113 chars+=("MAILPING")
114 fi
115 tmp=(/var/local/cron-errors/mailtest-slow*)
116 if (( ${#tmp[@]} )); then
117 chars+=("SPAMD")
118 fi
119
120 # early in install process, we dont have permission yet for exiqgrep.
121 # 1100 helps allow for system restarts
122 qlen=$(/usr/sbin/exiqgrep -o 1100 -c -b | awk '{print $1}') ||:
123 if ((qlen)); then
124 qmsg="queue length $qlen"
125 chars+=("q $qlen")
126 fi
127 case $HOSTNAME in
128 # No point in emailing about the mailq on a host where we don't
129 # check email.
130 $MAIL_HOST|bk)
131 loday -120 qlen $qmsg
132 ;;
133 esac
134
135 begin=false
136 if ! make -C /b/ds -q ~/.local/distro-begin || [[ $(<~/.local/distro-begin) != 0 ]]; then
137 begin=true
138 fi
139
140 end=false
141 if ! make -C /b/ds -q ~/.local/distro-end || [[ $(<~/.local/distro-end) != 0 ]]; then
142 end=true
143 fi
144
145 # these conditions are so we dont have an overly verbose prompt
146 if $begin && $end; then
147 chars+=("D")
148 elif $begin; then
149 chars+=("DB")
150 elif $end; then
151 chars+=("DE")
152 else
153 f=~/.local/conflink
154 # shellcheck disable=SC2043
155 for _ in 1; do
156 if [[ -e $f ]]; then
157 now=$(date +%s)
158 fsec=$(stat -c%Y $f)
159 # the / 60 makes it 0-59 seconds less strict, +1 to help make sure we
160 # dont have any false positives.
161 fmin=$(( (fsec - now + 1 ) / 60 ))
162 fminplus=$(( fmin + 60*24 ))
163 # Filesystem files get copied, so find any newer than the last run.
164 # The rest are hueristics:
165 # Given the last time we added a file in git, is that newer than the last conflink run.
166 # Given new files not added to git, were they modified more recently than the last conflink? but,
167 # push their modification time back by a day so we can develop them before needing to add them to git.
168
169 all_dirs=({/a/bin/ds,/p/c}{/filesystem,/machine_specific/$HOSTNAME/filesystem})
170 # This part is copied from conflink
171 for x in /p/c/machine_specific/*.hosts /a/bin/ds/machine_specific/*.hosts; do
172 if grep -qxF $HOSTNAME $x; then all_dirs+=( ${x%.hosts} ); fi
173 done
174
175 # Just because i forget a lot, -mmin -NUM means files modified <= NUM minutes ago
176 if (( fmin < 0 )) && [[ $(find ${all_dirs[@]} -mmin $fmin -type f -print -quit 2>/dev/null) ]]; then
177 v conflink newer filesystem files
178 chars+=("CONFLINK")
179 break
180 fi
181
182 for d in /a/bin/distro-setup /p/c; do
183 [[ -d $d ]] || continue
184 cd $d
185 if [[ ! -e .git ]]; then
186 # some hosts i dont push all of /p/c
187 continue
188 fi
189 if (( $(date -d "$(git log --diff-filter=ACR --format=%aD -1)" +%s) > fsec )); then
190 v conflink: newer files checked in to git
191 chars+=("CONFLINK")
192 break
193 fi
194
195 untracked=()
196 while read -r l; do
197 untracked+=("$l")
198 done < <(git ls-files -o --exclude-standard)
199 if [[ ${untracked[0]} && $(find "${untracked[@]}" -mmin $fminplus -type f -print -quit) ]]; then
200 v conflink: untracked in $d
201 chars+=("CONFLINK")
202 break
203 fi
204 done
205 cd /
206
207 fi
208 if [[ ! -e $f || $(<$f) != 0 ]]; then
209 v conflink: last run not found or failed
210 chars+=("CONFLINK")
211 break
212 fi
213 done
214 fi
215
216 # if [[ $(grep -v "exim user lost privilege for using -C option" /var/log/exim4/paniclog 2>/dev/null ||:) ]]; then
217 if [[ -s /var/log/exim4/paniclog ]]; then
218 chars+=("PANIC!")
219 # leave it up to epanic-clean to send email notification
220 fi
221
222 source /a/bin/bash_unpublished/source-state
223 if [[ $MAIL_HOST == "$HOSTNAME" ]]; then
224 bbkmsg=
225 if [[ $(systemctl is-active btrbk.timer) != active ]]; then
226 chars+=("BTRBK.TIMER")
227 bbkmsg="btrbk.timer not enabled"
228 fi
229 lo -48 btrbk.timer $bbkmsg
230
231 ## check if last snapshot was within an hour
232 vol=o
233 # this section generally copied from btrbk scripts, but
234 # this part modified to speed things up by about half a second.
235 # I'm not sure if its quite as reliable, but it looks pretty safe.
236 # Profiled it using time and also adding to the top of the file:
237 # set -x
238 # PS4='+ $(date "+%2N") '
239 # allow failure in case there are no snapshots yet.
240 # shellcheck disable=SC2012
241 shopt -u nullglob
242 files=(/mnt/root/btrbk/$vol.20*)
243 shopt -s nullglob
244 snaps=()
245 if (( ${#files[@]} )); then
246 snaps=($(ls -1avdr "${files[@]}" 2>/dev/null |head -n1 || : ))
247 fi
248 now=$(date +%s)
249 maxtime=0
250 for s in ${snaps[@]}; do
251 file=${s##*/}
252 t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s)
253 if (( t > maxtime )); then
254 maxtime=$t
255 fi
256 done
257 if (( maxtime < now - 4*60*60 )); then
258 chars+=("OLD-SNAP")
259 snapshotmsg="/o snapshot older than 4 hours"
260 fi
261 lo -1 old-snapshot $snapshotmsg
262 fi
263
264 cat /a/bin/bash_unpublished/source-state >$status_file
265
266 if [[ ${chars[*]} ]]; then
267 echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file
268 fi
269
270 }
271 # use this if we want to do something just once per minute
272 first_chars=()
273
274
275 write-status
276 if [[ $1 ]]; then
277 cat $status_file
278 exit 0
279 fi
280
281 main-loop() {
282 while true; do
283 power=true
284 if [[ -e /sys/class/power_supply/AC/online && $(</sys/class/power_supply/AC/online) == 0 ]]; then
285 power=false
286 fi
287 wait=15
288 if ! $power; then
289 wait=60
290 fi
291
292 sleep $wait
293 write-status
294 done
295 }
296
297 # ensure our long operations are one line so we are not prone errors
298 # from this file being modified.
299 main-loop