X-Git-Url: https://iankelling.org/git/?p=distro-setup;a=blobdiff_plain;f=system-status;h=430f8061c1c6611eafea56f369211c46d8804f94;hp=cd603ca57e4f982e149107d2aef856ccffb46451;hb=HEAD;hpb=9ac513d1086f22a8dede2ebe3ca0236443bdc429 diff --git a/system-status b/system-status index cd603ca..d6269d9 100755 --- a/system-status +++ b/system-status @@ -1,6 +1,22 @@ #!/bin/bash -# Copyright (C) 2019 Ian Kelling -# SPDX-License-Identifier: AGPL-3.0-or-later + +# Basic system status on on Ian's computers +# Copyright (C) 2024 Ian Kelling + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# SPDX-License-Identifier: GPL-3.0-or-later # usage: runs once every 15 seconds unless any args are passed, or we # then just runs once and have verbose output. On battery power, run @@ -13,7 +29,7 @@ if [[ $EUID != 1000 ]]; then exit 1 fi -source /a/bin/errhandle/err +source /a/bin/bash-bear-trap/bash-bear status_file=/dev/shm/iank-status shopt -s nullglob @@ -68,7 +84,7 @@ write-status() { dynamicipupdate ) bads=() - if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then + if systemctl show -p SubState --value ${services[@]} | grep -E -v '^(running|)$' &>/dev/null; then for s in ${services[@]}; do if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then bads+=($s) @@ -88,7 +104,7 @@ write-status() { prometheus ) bads=() - if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then + if systemctl show -p SubState --value ${services[@]} | grep -E -v '^(running|)$' &>/dev/null; then for s in ${services[@]}; do if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then bads+=($s) @@ -127,31 +143,26 @@ write-status() { ## check if last snapshot was recent old_snap_limit=$(( 3 * 60 * 60 )) - for vol in a o q; do - case $vol in - o) btrbk_root=/mnt/o/btrbk ;; - *) btrbk_root=/mnt/root/btrbk ;; - esac - # this section generally copied from btrbk scripts, but - # this part modified to speed things up by about half a second. - # I'm not sure if its quite as reliable, but it looks pretty safe. - # Profiled it using time and also adding to the top of the file: - # set -x - # PS4='+ $(date "+%2N") ' - # allow failure in case there are no snapshots yet. - # shellcheck disable=SC2012 - shopt -u nullglob - files=($btrbk_root/$vol.20*) - shopt -s nullglob - snaps=() - if (( ${#files[@]} )); then - snaps=($(ls -1avdr "${files[@]}" 2>/dev/null |head -n1 || : )) - fi + vol=o + btrbk_root=/mnt/o/btrbk + # this section generally copied from btrbk scripts, but + # this part modified to speed things up by about half a second. + # I'm not sure if its quite as reliable, but it looks pretty safe. + # Profiled it using time and also adding to the top of the file: + # set -x + # PS4='+ $(date "+%2N") ' + # allow failure in case there are no snapshots yet. + shopt -s nullglob + files=($btrbk_root/$vol.20*) + shopt -u nullglob + if (( ${#files[@]} )); then + # shellcheck disable=SC2012 # using ls version sort. not sure this is needed. + snaps=("$(ls -1avdr "${files[@]}" 2>/dev/null |head -n1 || : )") now=$EPOCHSECONDS maxtime=0 for s in ${snaps[@]}; do file=${s##*/} - t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s) + t=$(date -d "$(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#"$vol."})" +%s) if (( t > maxtime )); then maxtime=$t fi @@ -166,9 +177,8 @@ write-status() { p "$snapshotmsg" | lo -1 old-snapshot fi # not bothering to get info on all volumes if we find an old one. - break fi - done + fi if [[ $MAIL_HOST == "$HOSTNAME" ]]; then @@ -228,7 +238,7 @@ write-status() { # this is for tracking dns over tls issue, which # fixvpndns() in brc2 fixes. stat=$(resolvectl dnsovertls tunfsf 2>/dev/null ||: ) - read _ _ _ istls <<<"$stat" + read -r _ _ _ istls <<<"$stat" case $istls in no) : ;; *) @@ -263,13 +273,36 @@ write-status() { fi p $var_mail_msg | loday -1 var_mail - # early in install process, we dont have permission yet for exiqgrep. - # 1100 helps allow for system restarts - qlen=$(/usr/sbin/exiqgrep -o 1100 -c -b | awk '{print $1}') ||: + # Note, early in install process, we dont have permission yet for exiqgrep. + # + # todo: don't do this every 15 seconds, more like once every 2 minutes to + # save cpu cycles. + # + # 2400 = 40 mins. This should allow for system restarts, and + # 30 minute message delay plus 10 minute queu runs. + qlen=$(/usr/sbin/exiqgrep -o 2400 -c -b | awk '{print $1}') ||: qmsg= if ((qlen)); then - qmsg="queue length $qlen" - chars+=("q $qlen") + # Do sending of long delayed messages, and dont count them in our queue warnings. + for mid in $(exiqgrep -o 2400 -zi); do + if exim -Mvh $mid | awk 'tolower($2) == "fdate:"' | grep -q .; then + qlen=$(( qlen - 1 )) + # shellcheck disable=SC2016 # exim var, not a bash bar + if (( $(date -d "$(exim -Mset $mid -be <<<'$h_date:' | sed -n 's/^> *//;/./p')" +%s) < EPOCHSECONDS )); then + if ip a show veth0-mail &>/dev/null; then + pid=$(pgrep -f "/usr/sbin/exim4 -bd -q30m -C /etc/exim4/my.conf"|head -n1); + nsenter -t $pid -n -m /usr/sbin/exim4 -C /etc/exim4/my.conf -M $mid + else + /usr/sbin/exim4 -M $mid + fi + fi + fi + done + + if ((qlen)); then + qmsg="queue length $qlen" + chars+=("q $qlen") + fi fi case $HOSTNAME in # No point in emailing about the mailq on a host where we don't @@ -325,8 +358,10 @@ write-status() { if grep -qxF $HOSTNAME $x; then all_dirs+=( ${x%.hosts} ); fi done + script_files=("${my_service_scripts[@]}" "${my_bin_files[@]}" $my_lib_files) + # Just because i forget a lot, -mmin -NUM means files modified <= NUM minutes ago - if (( fmin < 0 )) && [[ $(find ${all_my_scripts[@]} ${all_dirs[@]} -mmin $fmin -type f -print -quit 2>/dev/null) ]]; then + if (( fmin < 0 )) && [[ $(find "${script_files[@]}" ${all_dirs[@]} -mmin $fmin -type f -print -quit 2>/dev/null) ]]; then v conflink newer filesystem files chars+=(CONFLINK) break @@ -372,6 +407,23 @@ write-status() { # leave it up to epanic-clean to send email notification fi + mprom=/var/lib/prometheus/node-exporter/mailtest-check.prom + if [[ -s $mprom ]]; then + if grep -qE 'mailtest_check_(unexpected|missing).*[^ ][^0]$' $mprom; then + chars+=("MTEST_SPAM") + fi + mtest_found=false + # shellcheck disable=SC2013 # these are words + for t in $(grep -E ^mailtest_check_last_usec $mprom | awk '{print $NF}'); do + if (( t + 60 * 20 < EPOCHSECONDS )); then + mtest_found=true + fi + done + if $mtest_found; then + chars+=("MTEST_AGE") + fi + fi + if [[ ! -e $status_file || -w $status_file ]]; then if [[ -e /a/bin/bash_unpublished/source-state ]]; then cat /a/bin/bash_unpublished/source-state >$status_file @@ -382,16 +434,51 @@ write-status() { fi fi } + +# This prevents me having to mute notifications when I'm going to bed. +mute() { + local locked + export DISPLAY=:0 + locked=false + if lock_info=$(xscreensaver-command -time); then + if [[ $lock_info != *non-blanked* ]]; then + locked=true + fi + else + locked=true + fi + midnight=$(date -d 00:00 +%s) + mdiff=$(( EPOCHSECONDS - midnight )) + if $locked && (( mdiff < 6 *60*60 || mdiff > 21 *60*60 )); then + case $(pactl get-sink-mute @DEFAULT_SINK@ | awk '{print $2}') in + no) + # for log purposes + echo muted + pactl set-sink-mute @DEFAULT_SINK@ true + ;; + esac + fi + if ! $locked && (( mdiff > 6 *60*60 || mdiff < 12 *60*60 )) && [[ ! -e /tmp/ianknap ]]; then + case $(pactl get-sink-mute @DEFAULT_SINK@ | awk '{print $2}') in + yes) + # for log purposes + echo unmuted + pactl set-sink-mute @DEFAULT_SINK@ false + ;; + esac + fi +} + # use this if we want to do something just once per minute first_chars=() - write-status if [[ $1 ]]; then cat $status_file exit 0 fi +loop_count=0 main-loop() { while true; do power=true @@ -399,15 +486,30 @@ main-loop() { power=false fi wait=15 - if ! $power; then - if systemctl -q is-active bitcoind; then - bitcoinoff + + if $power; then + if (( loop_count % 10 == 0 )); then + if [[ -r /sys/class/power_supply/BAT0/capacity ]]; then + bat=$(cat /sys/class/power_supply/BAT0/capacity) + else + bat=100 + fi + case $bat in + 100|9?) + : + bitcoinon & + ;; + esac fi + else + bitcoinoff wait=60 fi sleep $wait write-status + mute + loop_count=$(( loop_count + 1 )) done }