X-Git-Url: https://iankelling.org/git/?a=blobdiff_plain;f=system-status;h=07c730d71a7338db5a9343d359a857829a481cad;hb=d7551546ac323c5d4b49370c885646bcf96e959f;hp=2dd39ca7d99a90c88ccc9bddf7e51a49f37535cf;hpb=7d9ec600a5ed9f88b85e02a27ee017b85721a6ac;p=distro-setup diff --git a/system-status b/system-status old mode 100644 new mode 100755 index 2dd39ca..07c730d --- a/system-status +++ b/system-status @@ -2,8 +2,8 @@ # Copyright (C) 2019 Ian Kelling # SPDX-License-Identifier: AGPL-3.0-or-later -# usage: runs 4 times every 15 seconds unless any args are passed, or we -# are on battery power, then just runs once. +# usage: runs once every 15 seconds unless any args are passed, or we +# then just runs once. On battery power, run once per minute. if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi @@ -33,9 +33,52 @@ lo() { /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" root@localhost } +loday() { + /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" daylerts@iankelling.org +} + +# todo, consider migrating some of these alerts into prometheus write-status() { chars=("${first_chars[@]}") + services=( + epanicclean + systemstatus + btrfsmaintstop + dynamicipupdate + ) + bads=() + if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$'; then + for s in ${services[@]}; do + if [[ $(systemctl show -p SubState --value $s) != running ]]; then + bads+=($s) + fi + done + chars+=(MYSERS) + + fi + lo -240 mysers ${bads[*]} + + services=( + prometheus-node-exporter + prometheus-alertmanager + prometheus + ) + case $HOSTNAME in + kd) + bads=() + if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$'; then + for s in ${services[@]}; do + if [[ $(systemctl show -p SubState --value $s) != running ]]; then + bads+=($s) + fi + done + chars+=(PROM) + fi + lo -240 prom ${bads[*]} + ;; + esac + # clock us out in timetrap if are idle too long if [[ -e /p/.timetrap.db ]]; then export DISPLAY=:0 @@ -53,8 +96,24 @@ write-status() { fi fi - if pgrep -f 'emacs --daemon' &>/dev/null; then - emacsfiles="$(emacsclient --eval "$(cat /a/bin/ds/unsaved-buffers.el)"| sed '/^"nil"$/d;s/^"(/E: /;s/)"$//')" + + if ip l show tunfsf &>/dev/null; then + # this is for tracking dns over tls issue, which + # fixvpndns() in brc2 fixes. + stat=$(resolvectl dnsovertls tunfsf 2>/dev/null ||: ) + read _ _ _ istls <<<"$stat" + case $istls in + no) : ;; + *) + printf "%s\n" "$istls" | ts >> /tmp/istls.log + chars+=("T:$istls") + ;; + esac + fi + + + if pgrep -G iank -u iank -f 'emacs --daemon' &>/dev/null; then + emacsfiles="$(emacsclient --eval "$(cat /usr/local/bin/unsaved-buffers.el)"| sed '/^"nil"$/d;s/^"(/E: /;s/)"$//')" if [[ $emacsfiles ]]; then chars+=("$emacsfiles") fi @@ -62,35 +121,47 @@ write-status() { glob=(/nocow/btrfs-stale/*) if [[ -e ${glob[0]} ]]; then - chars+=("STALE") + chars+=(STALE) fi + var_mail_msg= if [[ $(find /var/mail -type f \! -empty -print -quit) ]]; then var_mail_msg="message in /var/mail" fi - lo -1 var_mail $var_mail_msg + loday -1 var_mail $var_mail_msg + + bouncemsg= glob=(/m/md/bounces/new/*) if [[ -e ${glob[0]} ]]; then - chars+=("BOUNCE") + chars+=(BOUNCE) bouncemsg="message in /m/md/bounces/new" fi - lo -1 bounce $bouncemsg + loday -1 bounce $bouncemsg # emails without the S (seen) flag. this only checks the last flag, # but its good enough for me. glob=(/m/md/alerts/{new,cur}/!(*,S)) if [[ -e ${glob[0]} ]]; then - chars+=("A") + chars+=(A) + fi + + glob=(/m/md/daylerts/{new,cur}/!(*,S)) + if [[ -e ${glob[0]} ]]; then + chars+=(DAY) fi + + tmp=(/var/local/cron-errors/mailtest-check*) if (( ${#tmp[@]} )); then - chars+=("MAILPING") + chars+=(MAILPING) fi tmp=(/var/local/cron-errors/mailtest-slow*) if (( ${#tmp[@]} )); then - chars+=("SPAMD") + chars+=(SPAMD) fi - # early in install process, we dont have permission yet for exiqgrep - qlen=$(/usr/sbin/exiqgrep -o 600 -c -b | awk '{print $1}') ||: + # early in install process, we dont have permission yet for exiqgrep. + # 1100 helps allow for system restarts + qlen=$(/usr/sbin/exiqgrep -o 1100 -c -b | awk '{print $1}') ||: + qmsg= if ((qlen)); then qmsg="queue length $qlen" chars+=("q $qlen") @@ -99,7 +170,7 @@ write-status() { # No point in emailing about the mailq on a host where we don't # check email. $MAIL_HOST|bk) - lo -120 qlen $qmsg + loday -120 qlen $qmsg ;; esac @@ -115,11 +186,11 @@ write-status() { # these conditions are so we dont have an overly verbose prompt if $begin && $end; then - chars+=("D") + chars+=(D) elif $begin; then - chars+=("DB") + chars+=(DB) elif $end; then - chars+=("DE") + chars+=(DE) else f=~/.local/conflink # shellcheck disable=SC2043 @@ -146,11 +217,12 @@ write-status() { # Just because i forget a lot, -mmin -NUM means files modified <= NUM minutes ago if (( fmin < 0 )) && [[ $(find ${all_dirs[@]} -mmin $fmin -type f -print -quit 2>/dev/null) ]]; then v conflink newer filesystem files - chars+=("CONFLINK") + chars+=(CONFLINK) break fi for d in /a/bin/distro-setup /p/c; do + [[ -d $d ]] || continue cd $d if [[ ! -e .git ]]; then # some hosts i dont push all of /p/c @@ -158,7 +230,7 @@ write-status() { fi if (( $(date -d "$(git log --diff-filter=ACR --format=%aD -1)" +%s) > fsec )); then v conflink: newer files checked in to git - chars+=("CONFLINK") + chars+=(CONFLINK) break fi @@ -168,7 +240,7 @@ write-status() { done < <(git ls-files -o --exclude-standard) if [[ ${untracked[0]} && $(find "${untracked[@]}" -mmin $fminplus -type f -print -quit) ]]; then v conflink: untracked in $d - chars+=("CONFLINK") + chars+=(CONFLINK) break fi done @@ -177,12 +249,13 @@ write-status() { fi if [[ ! -e $f || $(<$f) != 0 ]]; then v conflink: last run not found or failed - chars+=("CONFLINK") + chars+=(CONFLINK) break fi done fi + # if [[ $(grep -v "exim user lost privilege for using -C option" /var/log/exim4/paniclog 2>/dev/null ||:) ]]; then if [[ -s /var/log/exim4/paniclog ]]; then chars+=("PANIC!") # leave it up to epanic-clean to send email notification @@ -192,10 +265,10 @@ write-status() { if [[ $MAIL_HOST == "$HOSTNAME" ]]; then bbkmsg= if [[ $(systemctl is-active btrbk.timer) != active ]]; then - chars+=("BTRBK.TIMER") - bbkmsg="btrbk.timer not enabled" + chars+=(BTRBK.TIMER) + bbkmsg="not enabled" fi - lo -960 btrbk.timer $bbkmsg + lo -480 btrbk.timer $bbkmsg ## check if last snapshot was within an hour vol=o @@ -223,8 +296,9 @@ write-status() { maxtime=$t fi done + snapshotmsg= if (( maxtime < now - 4*60*60 )); then - chars+=("OLD-SNAP") + chars+=(OLD-SNAP) snapshotmsg="/o snapshot older than 4 hours" fi lo -1 old-snapshot $snapshotmsg @@ -240,10 +314,6 @@ write-status() { # use this if we want to do something just once per minute first_chars=() -power=true -if [[ -e /sys/class/power_supply/AC/online && $(