# Copyright (C) 2019 Ian Kelling
# SPDX-License-Identifier: AGPL-3.0-or-later
-# usage: runs 4 times every 15 seconds unless any args are passed, or we
-# are on battery power, then just runs once.
+# usage: runs once every 15 seconds unless any args are passed, or we
+# then just runs once and have verbose output. On battery power, run
+# once per minute.
if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi
+if [[ $EUID != 1000 ]]; then
+ echo "$0: error, expected to be user 1000"
+ exit 1
+fi
+
source /a/bin/errhandle/err
status_file=/dev/shm/iank-status
printf "%s\n" "$*"
fi
}
+p() { printf "%s\n" "$*"; }
# log-once COUNT NAME [MESSAGE]
lo() {
- /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" root@localhost
+ if type -p ifne &>/dev/null; then
+ /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" root@localhost
+ fi
+}
+
+loday() {
+ if type -p ifne &>/dev/null; then
+ /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" daylert@iankelling.org
+ fi
+}
+# rm glob
+rmg() {
+ if (( $# )); then
+ rm -f "$@"
+ fi
}
+# todo, consider migrating some of these alerts into prometheus
write-status() {
chars=("${first_chars[@]}")
- # clock us out in timetrap if are idle too long
- if [[ -e /p/.timetrap.db ]]; then
- export DISPLAY=:0
- if type -p xprintidle &>/dev/null && xidle=$(xprintidle 2>/dev/null); then
- if [[ $xidle == [0-9]* ]]; then
- sheet=$(sqlite3 /p/.timetrap.db "select sheet from entries where end is NULL;")
- idle=300000
- if [[ $sheet == w ]]; then
- idle=900000
- fi
- if [[ $sheet && $xidle -gt $idle ]]; then
- timetrap out
- fi
+ services=( epanicclean )
+ case $HOSTNAME in
+ bk|je|li) : ;;
+ *)
+ services+=(
+ systemstatus
+ btrfsmaintstop
+ dynamicipupdate
+ )
+ bads=()
+ if systemctl show -p SubState --value ${services[@]} | grep -E -v '^(running|)$' &>/dev/null; then
+ for s in ${services[@]}; do
+ if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then
+ bads+=($s)
+ fi
+ done
+ chars+=(MYSERS)
fi
- fi
+ p ${bads[*]} | lo -240 mysers
+ ;;
+ esac
+
+ case $HOSTNAME in
+ kd)
+ services=(
+ prometheus-node-exporter
+ prometheus-alertmanager
+ prometheus
+ )
+ bads=()
+ if systemctl show -p SubState --value ${services[@]} | grep -E -v '^(running|)$' &>/dev/null; then
+ for s in ${services[@]}; do
+ if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then
+ bads+=($s)
+ fi
+ done
+ chars+=(PROM)
+ fi
+ p ${bads[*]} | lo -240 prom
+ ;;
+ esac
+
+
+ # this section copied from servicepid()
+ unit=exim4
+ pid=$(systemctl show --property MainPID --value $unit ||:)
+ case $pid in
+ [1-9]*) : ;;
+ *)
+ dir=/sys/fs/cgroup/system.slice
+ if [[ ! -d $dir ]]; then
+ dir=/sys/fs/cgroup/systemd/system.slice
+ fi;
+ pid=$(head -n1 $dir/${unit%.service}.service/cgroup.procs ||:)
+ ;;
+ esac
+ if [[ ! $pid ]]; then
+ chars+=(EXIM)
fi
+ if [[ -e /a/bin/bash_unpublished/source-state ]]; then
+ # /a gets remounted due to btrbk, ignore error code for file doesnt exist
+ source /a/bin/bash_unpublished/source-state || [[ $? == 1 ]]
+ fi
+
+
+ ## check if last snapshot was recent
+ old_snap_limit=$(( 3 * 60 * 60 ))
+ for vol in a o q; do
+ case $vol in
+ o) btrbk_root=/mnt/o/btrbk ;;
+ *) btrbk_root=/mnt/root/btrbk ;;
+ esac
+ # this section generally copied from btrbk scripts, but
+ # this part modified to speed things up by about half a second.
+ # I'm not sure if its quite as reliable, but it looks pretty safe.
+ # Profiled it using time and also adding to the top of the file:
+ # set -x
+ # PS4='+ $(date "+%2N") '
+ # allow failure in case there are no snapshots yet.
+ # shellcheck disable=SC2012
+ shopt -s nullglob
+ files=($btrbk_root/$vol.20*)
+ shopt -u nullglob
+ if (( ! ${#files[@]} )); then
+ continue
+ fi
+ snaps=($(ls -1avdr "${files[@]}" 2>/dev/null |head -n1 || : ))
+ now=$EPOCHSECONDS
+ maxtime=0
+ for s in ${snaps[@]}; do
+ file=${s##*/}
+ t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s)
+ if (( t > maxtime )); then
+ maxtime=$t
+ fi
+ done
+ snapshotmsg=
+ last_snap_age=$(( now - maxtime ))
+ last_snap_hours=$(( last_snap_age / 60 / 60 ))
+ if (( last_snap_age > old_snap_limit )); then
+ chars+=(OLD-SNAP-${last_snap_hours}h)
+ snapshotmsg="/$vol snapshot older than 4 hours"
+ if [[ $MAIL_HOST == "$HOSTNAME" ]]; then
+ p "$snapshotmsg" | lo -1 old-snapshot
+ fi
+ # not bothering to get info on all volumes if we find an old one.
+ break
+ fi
+ done
+
+
+ if [[ $MAIL_HOST == "$HOSTNAME" ]]; then
+
+ bouncemsg=
+ glob=(/m/md/bounces/new/*)
+ if [[ -e ${glob[0]} ]]; then
+ chars+=(BOUNCE)
+ bouncemsg="message in /m/md/bounces/new"
+ fi
+ p $bouncemsg | loday -1 bounce
+ # emails without the S (seen) flag. this only checks the last flag,
+ # but its good enough for me.
+ glob=(/m/md/alerts/{new,cur}/!(*,S))
+ if [[ -e ${glob[0]} ]]; then
+ chars+=(A)
+ fi
+
+ glob=(/m/md/daylert/{new,cur}/!(*,S))
+ if [[ -e ${glob[0]} ]]; then
+ chars+=(DAY)
+ fi
+
+ bbkmsg=
+ if [[ $(systemctl is-active btrbk.timer) != active ]]; then
+ chars+=(BTRBK.TIMER)
+ bbkmsg="not enabled"
+ fi
+ p "$bbkmsg" | lo -480 btrbk.timer
+
+
+
+ # commented out, only using timetrap retrospectively.
+ # # clock us out in timetrap if are idle too long
+ # if [[ -e /p/.timetrap.db ]]; then
+ # export DISPLAY=:0
+ # if type -p xprintidle &>/dev/null && xidle=$(xprintidle 2>/dev/null); then
+ # if [[ $xidle == [0-9]* ]]; then
+ # sheet=$(sqlite3 /p/.timetrap.db "select sheet from entries where end is NULL;")
+ # idle=300000
+ # if [[ $sheet == w ]]; then
+ # idle=900000
+ # fi
+ # if [[ $sheet && $xidle -gt $idle ]]; then
+ # timetrap out
+ # fi
+ # fi
+ # fi
+ # fi
+ else # end if $MAIL_HOST
+ rmg /home/iank/cron-errors/bounce* \
+ /home/iank/cron-errors/btrbk.timer* \
+ /home/iank/cron-errors/old-snapshot*
+ fi
+
if ip l show tunfsf &>/dev/null; then
# this is for tracking dns over tls issue, which
# fixvpndns() in brc2 fixes.
stat=$(resolvectl dnsovertls tunfsf 2>/dev/null ||: )
- read _ _ _ istls <<<"$stat"
+ read -r _ _ _ istls <<<"$stat"
case $istls in
no) : ;;
*)
esac
fi
-
- if pgrep -G iank -u iank -f 'emacs --daemon' &>/dev/null; then
- emacsfiles="$(emacsclient --eval "$(cat /usr/local/bin/unsaved-buffers.el)"| sed '/^"nil"$/d;s/^"(/E: /;s/)"$//')"
- if [[ $emacsfiles ]]; then
- chars+=("$emacsfiles")
+ # We do this once every 5 minutes, since this is not a grave problem.
+ # For formatted elisp, see /b/ds/unsaved-buffers.el
+ elisp='(format "%s" (-reduce-from (lambda (acc buf) (let ((bpath (buffer-file-name buf))) (if (and bpath (buffer-modified-p buf)) (cons bpath acc ) acc))) nil (buffer-list)))'
+ if [[ ! $last_emacs_check || $emacsfiles ]] || (( last_emacs_check < EPOCHSECONDS - 300 )); then
+ if pgrep -G iank -u iank -f 'emacs --daemon' &>/dev/null; then
+ # i dun care if this fails
+ emacsfiles="$(timeout 1 emacsclient -a /usr/bin/true --eval "$elisp" 2>/dev/null | sed '/^"nil"$/d;s/^"(/E: /;s/)"$//' ||:)"
+ if [[ $emacsfiles ]]; then
+ chars+=("$emacsfiles")
+ fi
fi
+ last_emacs_check=$EPOCHSECONDS
fi
+
glob=(/nocow/btrfs-stale/*)
if [[ -e ${glob[0]} ]]; then
- chars+=("STALE")
+ chars+=(STALE)
fi
+ var_mail_msg=
if [[ $(find /var/mail -type f \! -empty -print -quit) ]]; then
var_mail_msg="message in /var/mail"
fi
- lo -1 var_mail $var_mail_msg
- glob=(/m/md/bounces/new/*)
- if [[ -e ${glob[0]} ]]; then
- chars+=("BOUNCE")
- bouncemsg="message in /m/md/bounces/new"
- fi
- lo -1 bounce $bouncemsg
- # emails without the S (seen) flag. this only checks the last flag,
- # but its good enough for me.
- glob=(/m/md/alerts/{new,cur}/!(*,S))
- if [[ -e ${glob[0]} ]]; then
- chars+=("A")
- fi
- tmp=(/var/local/cron-errors/mailtest-check*)
- if (( ${#tmp[@]} )); then
- chars+=("MAILPING")
- fi
- tmp=(/var/local/cron-errors/mailtest-slow*)
- if (( ${#tmp[@]} )); then
- chars+=("SPAMD")
- fi
+ p $var_mail_msg | loday -1 var_mail
- # early in install process, we dont have permission yet for exiqgrep
- qlen=$(/usr/sbin/exiqgrep -o 600 -c -b | awk '{print $1}') ||:
+ # early in install process, we dont have permission yet for exiqgrep.
+ # 1100 helps allow for system restarts
+ qlen=$(/usr/sbin/exiqgrep -o 1100 -c -b | awk '{print $1}') ||:
+ qmsg=
if ((qlen)); then
qmsg="queue length $qlen"
chars+=("q $qlen")
case $HOSTNAME in
# No point in emailing about the mailq on a host where we don't
# check email.
- $MAIL_HOST|bk)
- lo -120 qlen $qmsg
+ $MAIL_HOST)
+ p $qmsg | loday -120 qlen
+ ;;
+ *)
+ rmg /home/iank/cron-errors/qlen*
;;
esac
begin=false
- if ! make -C /b/ds -q ~/.local/distro-begin || [[ $(<~/.local/distro-begin) != 0 ]]; then
+
+ # todo: make this robust to the case of /a not being mounted
+ if ! make -C /b/ds -q ~/.local/distro-begin 2>/dev/null || [[ $(<~/.local/distro-begin) != 0 ]]; then
begin=true
fi
end=false
- if ! make -C /b/ds -q ~/.local/distro-end || [[ $(<~/.local/distro-end) != 0 ]]; then
+ if ! make -C /b/ds -q ~/.local/distro-end 2>/dev/null || [[ $(<~/.local/distro-end) != 0 ]]; then
end=true
fi
# these conditions are so we dont have an overly verbose prompt
if $begin && $end; then
- chars+=("D")
+ chars+=(D)
elif $begin; then
- chars+=("DB")
+ chars+=(DB)
elif $end; then
- chars+=("DE")
+ chars+=(DE)
else
+ source /a/bin/ds/script-files
f=~/.local/conflink
# shellcheck disable=SC2043
for _ in 1; do
if [[ -e $f ]]; then
- now=$(date +%s)
+ now=$EPOCHSECONDS
fsec=$(stat -c%Y $f)
# the / 60 makes it 0-59 seconds less strict, +1 to help make sure we
# dont have any false positives.
done
# Just because i forget a lot, -mmin -NUM means files modified <= NUM minutes ago
- if (( fmin < 0 )) && [[ $(find ${all_dirs[@]} -mmin $fmin -type f -print -quit 2>/dev/null) ]]; then
+ if (( fmin < 0 )) && [[ $(find ${all_my_scripts[@]} ${all_dirs[@]} -mmin $fmin -type f -print -quit 2>/dev/null) ]]; then
v conflink newer filesystem files
- chars+=("CONFLINK")
+ chars+=(CONFLINK)
break
fi
fi
if (( $(date -d "$(git log --diff-filter=ACR --format=%aD -1)" +%s) > fsec )); then
v conflink: newer files checked in to git
- chars+=("CONFLINK")
+ chars+=(CONFLINK)
break
fi
done < <(git ls-files -o --exclude-standard)
if [[ ${untracked[0]} && $(find "${untracked[@]}" -mmin $fminplus -type f -print -quit) ]]; then
v conflink: untracked in $d
- chars+=("CONFLINK")
+ chars+=(CONFLINK)
break
fi
done
fi
if [[ ! -e $f || $(<$f) != 0 ]]; then
v conflink: last run not found or failed
- chars+=("CONFLINK")
+ chars+=(CONFLINK)
break
fi
done
fi
-# if [[ $(grep -v "exim user lost privilege for using -C option" /var/log/exim4/paniclog 2>/dev/null ||:) ]]; then
+ # if [[ $(grep -v "exim user lost privilege for using -C option" /var/log/exim4/paniclog 2>/dev/null ||:) ]]; then
if [[ -s /var/log/exim4/paniclog ]]; then
chars+=("PANIC!")
# leave it up to epanic-clean to send email notification
fi
- source /a/bin/bash_unpublished/source-state
- if [[ $MAIL_HOST == "$HOSTNAME" ]]; then
- bbkmsg=
- if [[ $(systemctl is-active btrbk.timer) != active ]]; then
- chars+=("BTRBK.TIMER")
- bbkmsg="btrbk.timer not enabled"
+ if [[ ! -e $status_file || -w $status_file ]]; then
+ if [[ -e /a/bin/bash_unpublished/source-state ]]; then
+ cat /a/bin/bash_unpublished/source-state >$status_file
fi
- lo -960 btrbk.timer $bbkmsg
- ## check if last snapshot was within an hour
- vol=o
- # this section generally copied from btrbk scripts, but
- # this part modified to speed things up by about half a second.
- # I'm not sure if its quite as reliable, but it looks pretty safe.
- # Profiled it using time and also adding to the top of the file:
- # set -x
- # PS4='+ $(date "+%2N") '
- # allow failure in case there are no snapshots yet.
- # shellcheck disable=SC2012
- shopt -u nullglob
- files=(/mnt/root/btrbk/$vol.20*)
- shopt -s nullglob
- snaps=()
- if (( ${#files[@]} )); then
- snaps=($(ls -1avdr "${files[@]}" 2>/dev/null |head -n1 || : ))
- fi
- now=$(date +%s)
- maxtime=0
- for s in ${snaps[@]}; do
- file=${s##*/}
- t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s)
- if (( t > maxtime )); then
- maxtime=$t
- fi
- done
- if (( maxtime < now - 4*60*60 )); then
- chars+=("OLD-SNAP")
- snapshotmsg="/o snapshot older than 4 hours"
+ if [[ ${chars[*]} ]]; then
+ echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file
fi
- lo -1 old-snapshot $snapshotmsg
fi
+}
- cat /a/bin/bash_unpublished/source-state >$status_file
-
- if [[ ${chars[*]} ]]; then
- echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file
+# This prevents me having to mute notifications when I'm going to bed.
+mute() {
+ local locked
+ export DISPLAY=:0
+ locked=false
+ if lock_info=$(xscreensaver-command -time); then
+ if [[ $lock_info != *non-blanked* ]]; then
+ locked=true
+ fi
+ else
+ locked=true
+ fi
+ midnight=$(date -d 00:00 +%s)
+ mdiff=$(( EPOCHSECONDS - midnight ))
+ if $locked && (( mdiff < 6 || mdiff > 21 )); then
+ case $(pactl get-sink-mute @DEFAULT_SINK@ | awk '{print $2}') in
+ no)
+ # for log purposes
+ echo unmuted
+ pactl set-sink-mute @DEFAULT_SINK@ true
+ ;;
+ esac
+ fi
+ if ! $locked && (( mdiff > 6 || mdiff < 12 )) && [[ ! -e /tmp/ianknap ]]; then
+ case $(pactl get-sink-mute @DEFAULT_SINK@ | awk '{print $2}') in
+ yes)
+ # for log purposes
+ echo muted
+ pactl set-sink-mute @DEFAULT_SINK@ false
+ ;;
+ esac
fi
-
}
+
# use this if we want to do something just once per minute
first_chars=()
-power=true
-if [[ -e /sys/class/power_supply/AC/online && $(</sys/class/power_supply/AC/online) == 0 ]]; then
- power=false
-fi
-
write-status
if [[ $1 ]]; then
cat $status_file
exit 0
fi
-if ! $power; then
- exit 0
-fi
+main-loop() {
+ while true; do
+ power=true
+ if [[ -e /sys/class/power_supply/AC/online && $(</sys/class/power_supply/AC/online) == 0 ]]; then
+ power=false
+ fi
+ wait=15
+ if ! $power; then
+ if systemctl -q is-active bitcoind; then
+ bitcoinoff
+ fi
+ wait=60
+ fi
-# about 15 minutes
-for ((i=1; i<=60; i++)); do
- sleep 15
- write-status
-done
+ sleep $wait
+ write-status
+ mute
+ done
+}
+
+# ensure our long operations are one line so we are not prone errors
+# from this file being modified.
+main-loop