X-Git-Url: https://iankelling.org/git/?a=blobdiff_plain;f=system-status;h=3ccde7c023e1fb06017a24b5a9c3d7a627d4cc6a;hb=56c55d8e02cdd6ec67d2fe53cc03785d8876442e;hp=07c730d71a7338db5a9343d359a857829a481cad;hpb=d7551546ac323c5d4b49370c885646bcf96e959f;p=distro-setup diff --git a/system-status b/system-status index 07c730d..3ccde7c 100755 --- a/system-status +++ b/system-status @@ -7,6 +7,11 @@ if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi +if [[ $EUID != 1000 ]]; then + echo "$0: error, expected to be user 1000" + exit 1 +fi + source /a/bin/errhandle/err status_file=/dev/shm/iank-status @@ -28,74 +33,152 @@ v() { printf "%s\n" "$*" fi } +p() { printf "%s\n" "$*"; } # log-once COUNT NAME [MESSAGE] lo() { - /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" root@localhost + if type -p ifne &>/dev/null; then + /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" root@localhost + fi } loday() { - /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" daylerts@iankelling.org + if type -p ifne &>/dev/null; then + /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" daylert@iankelling.org + fi } # todo, consider migrating some of these alerts into prometheus write-status() { chars=("${first_chars[@]}") - services=( - epanicclean - systemstatus - btrfsmaintstop - dynamicipupdate - ) - bads=() - if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$'; then - for s in ${services[@]}; do - if [[ $(systemctl show -p SubState --value $s) != running ]]; then - bads+=($s) + services=( epanicclean ) + case $HOSTNAME in + bk|je|li) : ;; + *) + services+=( + systemstatus + btrfsmaintstop + dynamicipupdate + ) + bads=() + if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then + for s in ${services[@]}; do + if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then + bads+=($s) + fi + done + chars+=(MYSERS) fi - done - chars+=(MYSERS) - - fi - lo -240 mysers ${bads[*]} + p ${bads[*]} | lo -240 mysers + ;; + esac - services=( - prometheus-node-exporter - prometheus-alertmanager - prometheus - ) case $HOSTNAME in kd) + services=( + prometheus-node-exporter + prometheus-alertmanager + prometheus + ) bads=() - if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$'; then + if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then for s in ${services[@]}; do - if [[ $(systemctl show -p SubState --value $s) != running ]]; then + if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then bads+=($s) fi done chars+=(PROM) fi - lo -240 prom ${bads[*]} + p ${bads[*]} | lo -240 prom ;; esac - # clock us out in timetrap if are idle too long - if [[ -e /p/.timetrap.db ]]; then - export DISPLAY=:0 - if type -p xprintidle &>/dev/null && xidle=$(xprintidle 2>/dev/null); then - if [[ $xidle == [0-9]* ]]; then - sheet=$(sqlite3 /p/.timetrap.db "select sheet from entries where end is NULL;") - idle=300000 - if [[ $sheet == w ]]; then - idle=900000 - fi - if [[ $sheet && $xidle -gt $idle ]]; then - timetrap out - fi + + if [[ -e /a/bin/bash_unpublished/source-state ]]; then + # /a gets remounted due to btrbk, ignore error code for file doesnt exist + source /a/bin/bash_unpublished/source-state || [[ $? == 1 ]] + fi + if [[ $MAIL_HOST == "$HOSTNAME" ]]; then + + bouncemsg= + glob=(/m/md/bounces/new/*) + if [[ -e ${glob[0]} ]]; then + chars+=(BOUNCE) + bouncemsg="message in /m/md/bounces/new" + fi + p $bouncemsg | loday -1 bounce + # emails without the S (seen) flag. this only checks the last flag, + # but its good enough for me. + glob=(/m/md/alerts/{new,cur}/!(*,S)) + if [[ -e ${glob[0]} ]]; then + chars+=(A) + fi + + glob=(/m/md/daylert/{new,cur}/!(*,S)) + if [[ -e ${glob[0]} ]]; then + chars+=(DAY) + fi + + bbkmsg= + if [[ $(systemctl is-active btrbk.timer) != active ]]; then + chars+=(BTRBK.TIMER) + bbkmsg="not enabled" + fi + p "$bbkmsg" | lo -480 btrbk.timer + + ## check if last snapshot was within an hour + vol=o + # this section generally copied from btrbk scripts, but + # this part modified to speed things up by about half a second. + # I'm not sure if its quite as reliable, but it looks pretty safe. + # Profiled it using time and also adding to the top of the file: + # set -x + # PS4='+ $(date "+%2N") ' + # allow failure in case there are no snapshots yet. + # shellcheck disable=SC2012 + shopt -u nullglob + files=(/mnt/root/btrbk/$vol.20*) + shopt -s nullglob + snaps=() + if (( ${#files[@]} )); then + snaps=($(ls -1avdr "${files[@]}" 2>/dev/null |head -n1 || : )) + fi + now=$EPOCHSECONDS + maxtime=0 + for s in ${snaps[@]}; do + file=${s##*/} + t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s) + if (( t > maxtime )); then + maxtime=$t fi + done + snapshotmsg= + if (( maxtime < now - 4*60*60 )); then + chars+=(OLD-SNAP) + snapshotmsg="/o snapshot older than 4 hours" fi - fi + p "$snapshotmsg" | lo -1 old-snapshot + + + # commented out, only using timetrap retrospectively. + # # clock us out in timetrap if are idle too long + # if [[ -e /p/.timetrap.db ]]; then + # export DISPLAY=:0 + # if type -p xprintidle &>/dev/null && xidle=$(xprintidle 2>/dev/null); then + # if [[ $xidle == [0-9]* ]]; then + # sheet=$(sqlite3 /p/.timetrap.db "select sheet from entries where end is NULL;") + # idle=300000 + # if [[ $sheet == w ]]; then + # idle=900000 + # fi + # if [[ $sheet && $xidle -gt $idle ]]; then + # timetrap out + # fi + # fi + # fi + # fi + fi if ip l show tunfsf &>/dev/null; then # this is for tracking dns over tls issue, which @@ -111,14 +194,21 @@ write-status() { esac fi - - if pgrep -G iank -u iank -f 'emacs --daemon' &>/dev/null; then - emacsfiles="$(emacsclient --eval "$(cat /usr/local/bin/unsaved-buffers.el)"| sed '/^"nil"$/d;s/^"(/E: /;s/)"$//')" - if [[ $emacsfiles ]]; then - chars+=("$emacsfiles") + # We do this once every 5 minutes, since this is not a grave problem. + # For formatted elisp, see /b/ds/unsaved-buffers.el + elisp='(format "%s" (-reduce-from (lambda (acc buf) (let ((bpath (buffer-file-name buf))) (if (and bpath (buffer-modified-p buf)) (cons bpath acc ) acc))) nil (buffer-list)))' + if [[ ! $last_emacs_check || $emacsfiles ]] || (( last_emacs_check < EPOCHSECONDS - 300 )); then + if pgrep -G iank -u iank -f 'emacs --daemon' &>/dev/null; then + # i dun care if this fails + emacsfiles="$(timeout 1 emacsclient --eval "$elisp"| sed '/^"nil"$/d;s/^"(/E: /;s/)"$//' ||:)" + if [[ $emacsfiles ]]; then + chars+=("$emacsfiles") + fi fi + last_emacs_check=$EPOCHSECONDS fi + glob=(/nocow/btrfs-stale/*) if [[ -e ${glob[0]} ]]; then chars+=(STALE) @@ -127,36 +217,7 @@ write-status() { if [[ $(find /var/mail -type f \! -empty -print -quit) ]]; then var_mail_msg="message in /var/mail" fi - loday -1 var_mail $var_mail_msg - - bouncemsg= - glob=(/m/md/bounces/new/*) - if [[ -e ${glob[0]} ]]; then - chars+=(BOUNCE) - bouncemsg="message in /m/md/bounces/new" - fi - loday -1 bounce $bouncemsg - # emails without the S (seen) flag. this only checks the last flag, - # but its good enough for me. - glob=(/m/md/alerts/{new,cur}/!(*,S)) - if [[ -e ${glob[0]} ]]; then - chars+=(A) - fi - - glob=(/m/md/daylerts/{new,cur}/!(*,S)) - if [[ -e ${glob[0]} ]]; then - chars+=(DAY) - fi - - - tmp=(/var/local/cron-errors/mailtest-check*) - if (( ${#tmp[@]} )); then - chars+=(MAILPING) - fi - tmp=(/var/local/cron-errors/mailtest-slow*) - if (( ${#tmp[@]} )); then - chars+=(SPAMD) - fi + p $var_mail_msg | loday -1 var_mail # early in install process, we dont have permission yet for exiqgrep. # 1100 helps allow for system restarts @@ -169,18 +230,19 @@ write-status() { case $HOSTNAME in # No point in emailing about the mailq on a host where we don't # check email. - $MAIL_HOST|bk) - loday -120 qlen $qmsg + $MAIL_HOST) + p $qmsg | loday -120 qlen ;; esac begin=false - if ! make -C /b/ds -q ~/.local/distro-begin || [[ $(<~/.local/distro-begin) != 0 ]]; then + + if ! make -C /b/ds -q ~/.local/distro-begin 2>/dev/null || [[ $(<~/.local/distro-begin) != 0 ]]; then begin=true fi end=false - if ! make -C /b/ds -q ~/.local/distro-end || [[ $(<~/.local/distro-end) != 0 ]]; then + if ! make -C /b/ds -q ~/.local/distro-end 2>/dev/null || [[ $(<~/.local/distro-end) != 0 ]]; then end=true fi @@ -196,7 +258,7 @@ write-status() { # shellcheck disable=SC2043 for _ in 1; do if [[ -e $f ]]; then - now=$(date +%s) + now=$EPOCHSECONDS fsec=$(stat -c%Y $f) # the / 60 makes it 0-59 seconds less strict, +1 to help make sure we # dont have any false positives. @@ -261,55 +323,15 @@ write-status() { # leave it up to epanic-clean to send email notification fi - source /a/bin/bash_unpublished/source-state - if [[ $MAIL_HOST == "$HOSTNAME" ]]; then - bbkmsg= - if [[ $(systemctl is-active btrbk.timer) != active ]]; then - chars+=(BTRBK.TIMER) - bbkmsg="not enabled" + if [[ ! -e $status_file || -w $status_file ]]; then + if [[ -e /a/bin/bash_unpublished/source-state ]]; then + cat /a/bin/bash_unpublished/source-state >$status_file fi - lo -480 btrbk.timer $bbkmsg - ## check if last snapshot was within an hour - vol=o - # this section generally copied from btrbk scripts, but - # this part modified to speed things up by about half a second. - # I'm not sure if its quite as reliable, but it looks pretty safe. - # Profiled it using time and also adding to the top of the file: - # set -x - # PS4='+ $(date "+%2N") ' - # allow failure in case there are no snapshots yet. - # shellcheck disable=SC2012 - shopt -u nullglob - files=(/mnt/root/btrbk/$vol.20*) - shopt -s nullglob - snaps=() - if (( ${#files[@]} )); then - snaps=($(ls -1avdr "${files[@]}" 2>/dev/null |head -n1 || : )) + if [[ ${chars[*]} ]]; then + echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file fi - now=$(date +%s) - maxtime=0 - for s in ${snaps[@]}; do - file=${s##*/} - t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s) - if (( t > maxtime )); then - maxtime=$t - fi - done - snapshotmsg= - if (( maxtime < now - 4*60*60 )); then - chars+=(OLD-SNAP) - snapshotmsg="/o snapshot older than 4 hours" - fi - lo -1 old-snapshot $snapshotmsg fi - - cat /a/bin/bash_unpublished/source-state >$status_file - - if [[ ${chars[*]} ]]; then - echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file - fi - } # use this if we want to do something just once per minute first_chars=()