# Copyright (C) 2019 Ian Kelling
# SPDX-License-Identifier: AGPL-3.0-or-later
-# usage: runs 4 times every 15 seconds unless any args are passed, or we
-# are on battery power, then just runs once.
+# usage: runs once every 15 seconds unless any args are passed, or we
+# then just runs once. On battery power, run once per minute.
if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi
/usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" root@localhost
}
+loday() {
+ /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" daylerts@iankelling.org
+}
+
+# todo, consider migrating some of these alerts into prometheus
write-status() {
chars=("${first_chars[@]}")
+ services=(
+ epanicclean
+ systemstatus
+ btrfsmaintstop
+ dynamicipupdate
+ )
+ bads=()
+ if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$'; then
+ for s in ${services[@]}; do
+ if [[ $(systemctl show -p SubState --value $s) != running ]]; then
+ bads+=($s)
+ fi
+ done
+ chars+=(MYSERS)
+
+ fi
+ lo -240 mysers ${bads[*]}
+
+ services=(
+ prometheus-node-exporter
+ prometheus-alertmanager
+ prometheus
+ )
+ case $HOSTNAME in
+ kd)
+ bads=()
+ if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$'; then
+ for s in ${services[@]}; do
+ if [[ $(systemctl show -p SubState --value $s) != running ]]; then
+ bads+=($s)
+ fi
+ done
+ chars+=(PROM)
+ fi
+ lo -240 prom ${bads[*]}
+ ;;
+ esac
+
# clock us out in timetrap if are idle too long
if [[ -e /p/.timetrap.db ]]; then
export DISPLAY=:0
fi
fi
+
+ if ip l show tunfsf &>/dev/null; then
+ # this is for tracking dns over tls issue, which
+ # fixvpndns() in brc2 fixes.
+ stat=$(resolvectl dnsovertls tunfsf 2>/dev/null ||: )
+ read _ _ _ istls <<<"$stat"
+ case $istls in
+ no) : ;;
+ *)
+ printf "%s\n" "$istls" | ts >> /tmp/istls.log
+ chars+=("T:$istls")
+ ;;
+ esac
+ fi
+
+
+ if pgrep -G iank -u iank -f 'emacs --daemon' &>/dev/null; then
+ emacsfiles="$(emacsclient --eval "$(cat /usr/local/bin/unsaved-buffers.el)"| sed '/^"nil"$/d;s/^"(/E: /;s/)"$//')"
+ if [[ $emacsfiles ]]; then
+ chars+=("$emacsfiles")
+ fi
+ fi
+
glob=(/nocow/btrfs-stale/*)
if [[ -e ${glob[0]} ]]; then
- chars+=("STALE")
+ chars+=(STALE)
fi
+ var_mail_msg=
if [[ $(find /var/mail -type f \! -empty -print -quit) ]]; then
var_mail_msg="message in /var/mail"
fi
- lo -1 var_mail $var_mail_msg
+ loday -1 var_mail $var_mail_msg
+
+ bouncemsg=
glob=(/m/md/bounces/new/*)
if [[ -e ${glob[0]} ]]; then
- chars+=("BOUNCE")
+ chars+=(BOUNCE)
bouncemsg="message in /m/md/bounces/new"
fi
- lo -1 bounce $bouncemsg
+ loday -1 bounce $bouncemsg
# emails without the S (seen) flag. this only checks the last flag,
# but its good enough for me.
glob=(/m/md/alerts/{new,cur}/!(*,S))
if [[ -e ${glob[0]} ]]; then
- chars+=("A")
+ chars+=(A)
fi
+
+ glob=(/m/md/daylerts/{new,cur}/!(*,S))
+ if [[ -e ${glob[0]} ]]; then
+ chars+=(DAY)
+ fi
+
+
tmp=(/var/local/cron-errors/mailtest-check*)
if (( ${#tmp[@]} )); then
- chars+=("MAILPING")
+ chars+=(MAILPING)
fi
tmp=(/var/local/cron-errors/mailtest-slow*)
if (( ${#tmp[@]} )); then
- chars+=("SPAMD")
+ chars+=(SPAMD)
fi
- # early in install process, we dont have permission yet for exiqgrep
- qlen=$(/usr/sbin/exiqgrep -o 600 -c -b | awk '{print $1}') ||:
+ # early in install process, we dont have permission yet for exiqgrep.
+ # 1100 helps allow for system restarts
+ qlen=$(/usr/sbin/exiqgrep -o 1100 -c -b | awk '{print $1}') ||:
+ qmsg=
if ((qlen)); then
qmsg="queue length $qlen"
chars+=("q $qlen")
# No point in emailing about the mailq on a host where we don't
# check email.
$MAIL_HOST|bk)
- lo -120 qlen $qmsg
+ loday -120 qlen $qmsg
;;
esac
# these conditions are so we dont have an overly verbose prompt
if $begin && $end; then
- chars+=("D")
+ chars+=(D)
elif $begin; then
- chars+=("DB")
+ chars+=(DB)
elif $end; then
- chars+=("DE")
+ chars+=(DE)
else
f=~/.local/conflink
# shellcheck disable=SC2043
# Just because i forget a lot, -mmin -NUM means files modified <= NUM minutes ago
if (( fmin < 0 )) && [[ $(find ${all_dirs[@]} -mmin $fmin -type f -print -quit 2>/dev/null) ]]; then
v conflink newer filesystem files
- chars+=("CONFLINK")
+ chars+=(CONFLINK)
break
fi
for d in /a/bin/distro-setup /p/c; do
+ [[ -d $d ]] || continue
cd $d
if [[ ! -e .git ]]; then
# some hosts i dont push all of /p/c
fi
if (( $(date -d "$(git log --diff-filter=ACR --format=%aD -1)" +%s) > fsec )); then
v conflink: newer files checked in to git
- chars+=("CONFLINK")
+ chars+=(CONFLINK)
break
fi
done < <(git ls-files -o --exclude-standard)
if [[ ${untracked[0]} && $(find "${untracked[@]}" -mmin $fminplus -type f -print -quit) ]]; then
v conflink: untracked in $d
- chars+=("CONFLINK")
+ chars+=(CONFLINK)
break
fi
done
fi
if [[ ! -e $f || $(<$f) != 0 ]]; then
v conflink: last run not found or failed
- chars+=("CONFLINK")
+ chars+=(CONFLINK)
break
fi
done
fi
+ # if [[ $(grep -v "exim user lost privilege for using -C option" /var/log/exim4/paniclog 2>/dev/null ||:) ]]; then
if [[ -s /var/log/exim4/paniclog ]]; then
chars+=("PANIC!")
# leave it up to epanic-clean to send email notification
if [[ $MAIL_HOST == "$HOSTNAME" ]]; then
bbkmsg=
if [[ $(systemctl is-active btrbk.timer) != active ]]; then
- chars+=("BTRBK.TIMER")
- bbkmsg="btrbk.timer not enabled"
+ chars+=(BTRBK.TIMER)
+ bbkmsg="not enabled"
fi
- lo -960 btrbk.timer $bbkmsg
+ lo -480 btrbk.timer $bbkmsg
## check if last snapshot was within an hour
vol=o
maxtime=$t
fi
done
+ snapshotmsg=
if (( maxtime < now - 4*60*60 )); then
- chars+=("OLD-SNAP")
+ chars+=(OLD-SNAP)
snapshotmsg="/o snapshot older than 4 hours"
fi
lo -1 old-snapshot $snapshotmsg
# use this if we want to do something just once per minute
first_chars=()
-power=true
-if [[ -e /sys/class/power_supply/AC/online && $(</sys/class/power_supply/AC/online) == 0 ]]; then
- power=false
-fi
write-status
if [[ $1 ]]; then
exit 0
fi
-if ! $power; then
- exit 0
-fi
+main-loop() {
+ while true; do
+ power=true
+ if [[ -e /sys/class/power_supply/AC/online && $(</sys/class/power_supply/AC/online) == 0 ]]; then
+ power=false
+ fi
+ wait=15
+ if ! $power; then
+ wait=60
+ fi
-# about 15 minutes
-for ((i=1; i<=60; i++)); do
- sleep 15
- write-status
-done
+ sleep $wait
+ write-status
+ done
+}
+
+# ensure our long operations are one line so we are not prone errors
+# from this file being modified.
+main-loop