X-Git-Url: https://iankelling.org/git/?a=blobdiff_plain;f=system-status;h=fdf4758b5028a9f33ff03b91dfc0e9dedb22c77c;hb=06a56540b041accaaff795ad57664e368cb07a10;hp=12e0426c3d0519ff6d8ca2323490736d5c1458da;hpb=fa5deaee2e0182ddfc7b39eea7ee2acedb259ddf;p=distro-setup diff --git a/system-status b/system-status index 12e0426..fdf4758 100755 --- a/system-status +++ b/system-status @@ -3,7 +3,8 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # usage: runs once every 15 seconds unless any args are passed, or we -# then just runs once. On battery power, run once per minute. +# then just runs once and have verbose output. On battery power, run +# once per minute. if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi @@ -46,6 +47,12 @@ loday() { /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" daylert@iankelling.org fi } +# rm glob +rmg() { + if (( $# )); then + rm -f "$@" + fi +} # todo, consider migrating some of these alerts into prometheus write-status() { @@ -61,7 +68,7 @@ write-status() { dynamicipupdate ) bads=() - if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then + if systemctl show -p SubState --value ${services[@]} | grep -E -v '^(running|)$' &>/dev/null; then for s in ${services[@]}; do if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then bads+=($s) @@ -81,7 +88,7 @@ write-status() { prometheus ) bads=() - if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then + if systemctl show -p SubState --value ${services[@]} | grep -E -v '^(running|)$' &>/dev/null; then for s in ${services[@]}; do if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then bads+=($s) @@ -94,10 +101,76 @@ write-status() { esac + # this section copied from servicepid() + unit=exim4 + pid=$(systemctl show --property MainPID --value $unit ||:) + case $pid in + [1-9]*) : ;; + *) + dir=/sys/fs/cgroup/system.slice + if [[ ! -d $dir ]]; then + dir=/sys/fs/cgroup/systemd/system.slice + fi; + pid=$(head -n1 $dir/${unit%.service}.service/cgroup.procs ||:) + ;; + esac + if [[ ! $pid ]]; then + chars+=(EXIM) + fi + + if [[ -e /a/bin/bash_unpublished/source-state ]]; then # /a gets remounted due to btrbk, ignore error code for file doesnt exist source /a/bin/bash_unpublished/source-state || [[ $? == 1 ]] fi + + + ## check if last snapshot was recent + old_snap_limit=$(( 3 * 60 * 60 )) + for vol in a o q; do + case $vol in + o) btrbk_root=/mnt/o/btrbk ;; + *) btrbk_root=/mnt/root/btrbk ;; + esac + # this section generally copied from btrbk scripts, but + # this part modified to speed things up by about half a second. + # I'm not sure if its quite as reliable, but it looks pretty safe. + # Profiled it using time and also adding to the top of the file: + # set -x + # PS4='+ $(date "+%2N") ' + # allow failure in case there are no snapshots yet. + # shellcheck disable=SC2012 + shopt -s nullglob + files=($btrbk_root/$vol.20*) + shopt -u nullglob + if (( ! ${#files[@]} )); then + continue + fi + snaps=($(ls -1avdr "${files[@]}" 2>/dev/null |head -n1 || : )) + now=$EPOCHSECONDS + maxtime=0 + for s in ${snaps[@]}; do + file=${s##*/} + t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s) + if (( t > maxtime )); then + maxtime=$t + fi + done + snapshotmsg= + last_snap_age=$(( now - maxtime )) + last_snap_hours=$(( last_snap_age / 60 / 60 )) + if (( last_snap_age > old_snap_limit )); then + chars+=(OLD-SNAP-${last_snap_hours}h) + snapshotmsg="/$vol snapshot older than 4 hours" + if [[ $MAIL_HOST == "$HOSTNAME" ]]; then + p "$snapshotmsg" | lo -1 old-snapshot + fi + # not bothering to get info on all volumes if we find an old one. + break + fi + done + + if [[ $MAIL_HOST == "$HOSTNAME" ]]; then bouncemsg= @@ -126,38 +199,6 @@ write-status() { fi p "$bbkmsg" | lo -480 btrbk.timer - ## check if last snapshot was within an hour - vol=o - # this section generally copied from btrbk scripts, but - # this part modified to speed things up by about half a second. - # I'm not sure if its quite as reliable, but it looks pretty safe. - # Profiled it using time and also adding to the top of the file: - # set -x - # PS4='+ $(date "+%2N") ' - # allow failure in case there are no snapshots yet. - # shellcheck disable=SC2012 - shopt -u nullglob - files=(/mnt/root/btrbk/$vol.20*) - shopt -s nullglob - snaps=() - if (( ${#files[@]} )); then - snaps=($(ls -1avdr "${files[@]}" 2>/dev/null |head -n1 || : )) - fi - now=$EPOCHSECONDS - maxtime=0 - for s in ${snaps[@]}; do - file=${s##*/} - t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s) - if (( t > maxtime )); then - maxtime=$t - fi - done - snapshotmsg= - if (( maxtime < now - 4*60*60 )); then - chars+=(OLD-SNAP) - snapshotmsg="/o snapshot older than 4 hours" - fi - p "$snapshotmsg" | lo -1 old-snapshot # commented out, only using timetrap retrospectively. @@ -177,14 +218,17 @@ write-status() { # fi # fi # fi - + else # end if $MAIL_HOST + rmg /home/iank/cron-errors/bounce* \ + /home/iank/cron-errors/btrbk.timer* \ + /home/iank/cron-errors/old-snapshot* fi if ip l show tunfsf &>/dev/null; then # this is for tracking dns over tls issue, which # fixvpndns() in brc2 fixes. stat=$(resolvectl dnsovertls tunfsf 2>/dev/null ||: ) - read _ _ _ istls <<<"$stat" + read -r _ _ _ istls <<<"$stat" case $istls in no) : ;; *) @@ -200,7 +244,7 @@ write-status() { if [[ ! $last_emacs_check || $emacsfiles ]] || (( last_emacs_check < EPOCHSECONDS - 300 )); then if pgrep -G iank -u iank -f 'emacs --daemon' &>/dev/null; then # i dun care if this fails - emacsfiles="$(timeout 1 emacsclient --eval "$elisp"| sed '/^"nil"$/d;s/^"(/E: /;s/)"$//' ||:)" + emacsfiles="$(timeout 1 emacsclient -a /usr/bin/true --eval "$elisp" 2>/dev/null | sed '/^"nil"$/d;s/^"(/E: /;s/)"$//' ||:)" if [[ $emacsfiles ]]; then chars+=("$emacsfiles") fi @@ -219,23 +263,35 @@ write-status() { fi p $var_mail_msg | loday -1 var_mail - - tmp=(/var/local/cron-errors/mailtest-check*) - if (( ${#tmp[@]} )); then - chars+=(MAILPING) - fi - tmp=(/var/local/cron-errors/mailtest-slow*) - if (( ${#tmp[@]} )); then - chars+=(SPAMD) - fi - - # early in install process, we dont have permission yet for exiqgrep. - # 1100 helps allow for system restarts - qlen=$(/usr/sbin/exiqgrep -o 1100 -c -b | awk '{print $1}') ||: + # Note, early in install process, we dont have permission yet for exiqgrep. + # + # todo: don't do this every 15 seconds, more like once every 2 minutes to + # save cpu cycles. + # + # 2400 = 40 mins. This should allow for system restarts, and + # 30 minute message delay plus 10 minute queu runs. + qlen=$(/usr/sbin/exiqgrep -o 2400 -c -b | awk '{print $1}') ||: qmsg= if ((qlen)); then - qmsg="queue length $qlen" - chars+=("q $qlen") + # Do sending of long delayed messages, and dont count them in our queue warnings. + for mid in $(exiqgrep -o 2400 -zi); do + if exim -Mvh $mid | awk 'tolower($2) == "fdate:"' | grep -q .; then + qlen=$(( qlen - 1 )) + if (( $(date -d "$(exim -Mset $mid -be <<<'$h_date:' | sed -n 's/^> *//;/./p')" +%s) < EPOCHSECONDS )); then + if ip a show veth0-mail &>/dev/null; then + pid=$(pgrep -f "/usr/sbin/exim4 -bd -q30m -C /etc/exim4/my.conf"|head -n1); + nsenter -t $pid -n -m /usr/sbin/exim4 -C /etc/exim4/my.conf -M $mid + else + /usr/sbin/exim4 -M $mid + fi + fi + fi + done + + if ((qlen)); then + qmsg="queue length $qlen" + chars+=("q $qlen") + fi fi case $HOSTNAME in # No point in emailing about the mailq on a host where we don't @@ -243,10 +299,14 @@ write-status() { $MAIL_HOST) p $qmsg | loday -120 qlen ;; + *) + rmg /home/iank/cron-errors/qlen* + ;; esac begin=false + # todo: make this robust to the case of /a not being mounted if ! make -C /b/ds -q ~/.local/distro-begin 2>/dev/null || [[ $(<~/.local/distro-begin) != 0 ]]; then begin=true fi @@ -264,6 +324,7 @@ write-status() { elif $end; then chars+=(DE) else + source /a/bin/ds/script-files f=~/.local/conflink # shellcheck disable=SC2043 for _ in 1; do @@ -287,7 +348,7 @@ write-status() { done # Just because i forget a lot, -mmin -NUM means files modified <= NUM minutes ago - if (( fmin < 0 )) && [[ $(find ${all_dirs[@]} -mmin $fmin -type f -print -quit 2>/dev/null) ]]; then + if (( fmin < 0 )) && [[ $(find ${all_my_scripts[@]} ${all_dirs[@]} -mmin $fmin -type f -print -quit 2>/dev/null) ]]; then v conflink newer filesystem files chars+=(CONFLINK) break @@ -343,10 +404,44 @@ write-status() { fi fi } + +# This prevents me having to mute notifications when I'm going to bed. +mute() { + local locked + export DISPLAY=:0 + locked=false + if lock_info=$(xscreensaver-command -time); then + if [[ $lock_info != *non-blanked* ]]; then + locked=true + fi + else + locked=true + fi + midnight=$(date -d 00:00 +%s) + mdiff=$(( EPOCHSECONDS - midnight )) + if $locked && (( mdiff < 6 || mdiff > 21 )); then + case $(pactl get-sink-mute @DEFAULT_SINK@ | awk '{print $2}') in + no) + # for log purposes + echo unmuted + pactl set-sink-mute @DEFAULT_SINK@ true + ;; + esac + fi + if ! $locked && (( mdiff > 6 || mdiff < 12 )) && [[ ! -e /tmp/ianknap ]]; then + case $(pactl get-sink-mute @DEFAULT_SINK@ | awk '{print $2}') in + yes) + # for log purposes + echo muted + pactl set-sink-mute @DEFAULT_SINK@ false + ;; + esac + fi +} + # use this if we want to do something just once per minute first_chars=() - write-status if [[ $1 ]]; then cat $status_file @@ -361,11 +456,15 @@ main-loop() { fi wait=15 if ! $power; then + if systemctl -q is-active bitcoind; then + bitcoinoff + fi wait=60 fi sleep $wait write-status + mute done }