2 # Copyright (C) 2019 Ian Kelling
3 # SPDX-License-Identifier: AGPL-3.0-or-later
5 # usage: runs once every 15 seconds unless any args are passed, or we
6 # then just runs once. On battery power, run once per minute.
8 if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi
10 source /a
/bin
/errhandle
/err
11 status_file
=/dev
/shm
/iank-status
17 for p
in ~
/.gem
/ruby
/*/bin
; do
31 # log-once COUNT NAME [MESSAGE]
33 if type -p ifne
&>/dev
/null
; then
34 /usr
/local
/bin
/log-once
"$@" | ifne
mail -s "$HOSTNAME: system-status $2" root@localhost
39 if type -p ifne
&>/dev
/null
; then
40 /usr
/local
/bin
/log-once
"$@" | ifne
mail -s "$HOSTNAME: system-status $2" daylert@iankelling.org
44 # todo, consider migrating some of these alerts into prometheus
46 chars
=("${first_chars[@]}")
49 services
=( epanicclean
)
62 if systemctl show
-p SubState
--value ${services[@]} |
egrep -v '^(running|)$' &>/dev
/null
; then
63 for s
in ${services[@]}; do
64 if [[ $
(systemctl show
-p SubState
--value $s 2>&1) != running
]]; then
71 lo
-240 mysers
${bads[*]}
74 prometheus-node-exporter
75 prometheus-alertmanager
81 if systemctl show
-p SubState
--value ${services[@]} |
egrep -v '^(running|)$' &>/dev
/null
; then
82 for s
in ${services[@]}; do
83 if [[ $
(systemctl show
-p SubState
--value $s 2>&1) != running
]]; then
89 lo
-240 prom
${bads[*]}
93 # clock us out in timetrap if are idle too long
94 if [[ -e /p
/.timetrap.db
]]; then
96 if type -p xprintidle
&>/dev
/null
&& xidle
=$
(xprintidle
2>/dev
/null
); then
97 if [[ $xidle == [0-9]* ]]; then
98 sheet
=$
(sqlite3
/p
/.timetrap.db
"select sheet from entries where end is NULL;")
100 if [[ $sheet == w
]]; then
103 if [[ $sheet && $xidle -gt $idle ]]; then
111 if ip l show tunfsf
&>/dev
/null
; then
112 # this is for tracking dns over tls issue, which
113 # fixvpndns() in brc2 fixes.
114 stat
=$
(resolvectl dnsovertls tunfsf
2>/dev
/null ||
: )
115 read _ _ _ istls
<<<"$stat"
119 printf "%s\n" "$istls" | ts
>> /tmp
/istls.log
126 if pgrep
-G iank
-u iank
-f 'emacs --daemon' &>/dev
/null
; then
127 emacsfiles
="$(emacsclient --eval "$
(cat /usr
/local
/bin
/unsaved-buffers.el
)"| sed '/^"nil
"$/d;s/^"(/E
: /;s
/)"$//')"
128 if [[ $emacsfiles ]]; then
129 chars
+=("$emacsfiles")
133 glob
=(/nocow
/btrfs-stale
/*)
134 if [[ -e ${glob[0]} ]]; then
138 if [[ $
(find /var
/mail -type f \
! -empty -print -quit) ]]; then
139 var_mail_msg
="message in /var/mail"
141 loday
-1 var_mail
$var_mail_msg
144 glob
=(/m
/md
/bounces
/new
/*)
145 if [[ -e ${glob[0]} ]]; then
147 bouncemsg
="message in /m/md/bounces/new"
149 loday
-1 bounce
$bouncemsg
150 # emails without the S (seen) flag. this only checks the last flag,
151 # but its good enough for me.
152 glob
=(/m
/md
/alerts
/{new
,cur
}/!(*,S
))
153 if [[ -e ${glob[0]} ]]; then
157 glob
=(/m
/md
/daylert
/{new
,cur
}/!(*,S
))
158 if [[ -e ${glob[0]} ]]; then
163 tmp
=(/var
/local
/cron-errors
/mailtest-check
*)
164 if (( ${#tmp[@]} )); then
167 tmp
=(/var
/local
/cron-errors
/mailtest-slow
*)
168 if (( ${#tmp[@]} )); then
172 # early in install process, we dont have permission yet for exiqgrep.
173 # 1100 helps allow for system restarts
174 qlen
=$
(/usr
/sbin
/exiqgrep
-o 1100 -c -b |
awk '{print $1}') ||
:
177 qmsg
="queue length $qlen"
181 # No point in emailing about the mailq on a host where we don't
184 loday
-120 qlen
$qmsg
190 if ! make -C /b
/ds
-q ~
/.local
/distro-begin
2>/dev
/null ||
[[ $
(<~
/.local
/distro-begin
) != 0 ]]; then
195 if ! make -C /b
/ds
-q ~
/.local
/distro-end
2>/dev
/null ||
[[ $
(<~
/.local
/distro-end
) != 0 ]]; then
199 # these conditions are so we dont have an overly verbose prompt
200 if $begin && $end; then
208 # shellcheck disable=SC2043
213 # the / 60 makes it 0-59 seconds less strict, +1 to help make sure we
214 # dont have any false positives.
215 fmin
=$
(( (fsec
- now
+ 1 ) / 60 ))
216 fminplus
=$
(( fmin
+ 60*24 ))
217 # Filesystem files get copied, so find any newer than the last run.
218 # The rest are hueristics:
219 # Given the last time we added a file in git, is that newer than the last conflink run.
220 # Given new files not added to git, were they modified more recently than the last conflink? but,
221 # push their modification time back by a day so we can develop them before needing to add them to git.
223 all_dirs
=({/a
/bin
/ds
,/p
/c
}{/filesystem
,/machine_specific
/$HOSTNAME/filesystem
})
224 # This part is copied from conflink
225 for x
in /p
/c
/machine_specific
/*.hosts
/a
/bin
/ds
/machine_specific
/*.hosts
; do
226 if grep -qxF $HOSTNAME $x; then all_dirs
+=( ${x%.hosts} ); fi
229 # Just because i forget a lot, -mmin -NUM means files modified <= NUM minutes ago
230 if (( fmin
< 0 )) && [[ $
(find ${all_dirs[@]} -mmin $fmin -type f
-print -quit 2>/dev
/null
) ]]; then
231 v conflink newer filesystem files
236 for d
in /a
/bin
/distro-setup
/p
/c
; do
237 [[ -d $d ]] ||
continue
239 if [[ ! -e .git
]]; then
240 # some hosts i dont push all of /p/c
243 if (( $
(date -d "$(git log --diff-filter=ACR --format=%aD -1)" +%s
) > fsec
)); then
244 v conflink
: newer files checked
in to git
252 done < <(git ls-files
-o --exclude-standard)
253 if [[ ${untracked[0]} && $
(find "${untracked[@]}" -mmin $fminplus -type f
-print -quit) ]]; then
254 v conflink
: untracked
in $d
262 if [[ ! -e $f || $
(<$f) != 0 ]]; then
263 v conflink
: last run not found or failed
270 # if [[ $(grep -v "exim user lost privilege for using -C option" /var/log/exim4/paniclog 2>/dev/null ||:) ]]; then
271 if [[ -s /var
/log
/exim
4/paniclog
]]; then
273 # leave it up to epanic-clean to send email notification
276 if [[ -e /a
/bin
/bash_unpublished
/source-state
]]; then
277 # /a gets remounted due to btrbk, ignore error code for file doesnt exist
278 source /a
/bin
/bash_unpublished
/source-state ||
[[ $?
== 1 ]]
280 if [[ $MAIL_HOST == "$HOSTNAME" ]]; then
282 if [[ $
(systemctl is-active btrbk.timer
) != active
]]; then
286 lo
-480 btrbk.timer
$bbkmsg
288 ## check if last snapshot was within an hour
290 # this section generally copied from btrbk scripts, but
291 # this part modified to speed things up by about half a second.
292 # I'm not sure if its quite as reliable, but it looks pretty safe.
293 # Profiled it using time and also adding to the top of the file:
295 # PS4='+ $(date "+%2N") '
296 # allow failure in case there are no snapshots yet.
297 # shellcheck disable=SC2012
299 files
=(/mnt
/root
/btrbk
/$vol.20*)
302 if (( ${#files[@]} )); then
303 snaps
=($
(ls -1avdr "${files[@]}" 2>/dev
/null |
head -n1 ||
: ))
307 for s
in ${snaps[@]}; do
309 t
=$
(date -d $
(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s
)
310 if (( t
> maxtime
)); then
315 if (( maxtime
< now
- 4*60*60 )); then
317 snapshotmsg
="/o snapshot older than 4 hours"
319 lo
-1 old-snapshot
$snapshotmsg
322 if [[ ! -e $status_file ||
-w $status_file ]]; then
323 if [[ -e /a
/bin
/bash_unpublished
/source-state
]]; then
324 cat /a
/bin
/bash_unpublished
/source-state
>$status_file
327 if [[ ${chars[*]} ]]; then
328 echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file
333 # use this if we want to do something just once per minute
346 if [[ -e /sys
/class
/power_supply
/AC
/online
&& $
(</sys
/class
/power_supply
/AC
/online
) == 0 ]]; then
359 # ensure our long operations are one line so we are not prone errors
360 # from this file being modified.