From 047f5354e9f0dd630a7d0a8122b86df386fff489 Mon Sep 17 00:00:00 2001 From: Ian Kelling Date: Sat, 14 Feb 2026 07:04:54 -0500 Subject: [PATCH] investigate prof-tail failures --- filesystem/usr/local/bin/prof | 4 +-- filesystem/usr/local/bin/prof-remote | 2 +- filesystem/usr/local/bin/prof-tail | 37 ++++++++++++++++++++++++---- system-status | 3 +++ 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/filesystem/usr/local/bin/prof b/filesystem/usr/local/bin/prof index b7ddf7f..601942f 100755 --- a/filesystem/usr/local/bin/prof +++ b/filesystem/usr/local/bin/prof @@ -41,8 +41,6 @@ if $dossh; then export IANK_BASHRC_RUN="prof-remote $remote" konsole --profile profanity else - prof-tail & + prof-tail |& ts "%F %T" | tee -a /home/iank/.local/prof-tail.log & konsole --profile profanity -e tmux -L profanity a fi - - diff --git a/filesystem/usr/local/bin/prof-remote b/filesystem/usr/local/bin/prof-remote index a413d56..71cf344 100755 --- a/filesystem/usr/local/bin/prof-remote +++ b/filesystem/usr/local/bin/prof-remote @@ -33,7 +33,7 @@ while true; do fi # -n or else it competes with the other ssh for reading stdin. # -l iank is just a safety measure against misconfig/temporary config in .ssh/config. - ssh -l iank -n $remote -- prof-tail -r | prof-notify & + ssh -l iank -n $remote -- prof-tail -r |& prof-notify & ssh -l iank -t $remote tmux -L profanity a ||: builtin kill %% &> /dev/null ||: if (( EPOCHSECONDS > start + 600 )); then diff --git a/filesystem/usr/local/bin/prof-tail b/filesystem/usr/local/bin/prof-tail index 0bf0dd5..061f542 100755 --- a/filesystem/usr/local/bin/prof-tail +++ b/filesystem/usr/local/bin/prof-tail @@ -21,6 +21,8 @@ # limitations under the License. set -e; . /usr/local/lib/bash-bear; set +e +# We infinite retry, not exit on error. +trap err-info ERR remote=false if [[ $1 == -r ]]; then @@ -34,9 +36,20 @@ if [[ $1 == -d ]]; then shift fi +## flag to do manual testing during development +dev=false +tailn=0 +if [[ $1 == -e ]]; then + dev_date=(-d '-1 day') + tailn=100000 + debug=true + dev=true + shift +fi -pkill -fA '^/bin/bash /usr/local/bin/prof-tail' ||: - +if ! $dev; then + pkill -fA '^/bin/bash /usr/local/bin/prof-tail' ||: +fi # kill 0 doesn't seem to be documented in man bash, but it kills the # background processes. @@ -52,17 +65,19 @@ pkill -fA '^/bin/bash /usr/local/bin/prof-tail' ||: # ruben xmpp_users=( + jtuttle andrew craigt dawnbp ekokao gregf + heshan johnh jrasata ksiewicz michael + sean zoe - jtuttle ) tail-cmd() { @@ -77,7 +92,7 @@ tail-cmd() { # note: man timeout says 124 is when timeout times out. # for debugging, add test2 to the room list and use jtuttle with pidgin. - timeout $secs_till_midnight tail -n0 -qF \ + timeout $secs_till_midnight tail -n$tailn -qF \ $logdir/rooms/{sys-private,office,operations}_at_conference.fsf.org/$log_today ${user_logs[@]} 2>/dev/null \ | sed -urn "/$pre iank: /d;s/$pre ([^@ ]+: .*\biank\b.*)/\1/p;s/$pre ([^@:]+)@.*/pm: \1/p" || [[ ${PIPESTATUS[0]} == 124 ]] @@ -97,10 +112,12 @@ notify-cmd() { pre="^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}-[0-9]{2} -" +declare -i fail_loops=0 loop_start while true; do + loop_start=$EPOCH_SECONDS midnight=$(date -d '00:00:00 tomorrow' +%s) secs_till_midnight=$(( midnight - EPOCHSECONDS )) - log_today=$(date '+%Y_%m_%d').log + log_today=$(date '+%Y_%m_%d' "${dev_date[@]}").log logdir=/home/iank/.local/share/profanity/chatlogs/iank_at_fsf.org user_logs=() for u in ${xmpp_users[@]}; do @@ -113,5 +130,15 @@ while true; do tail-cmd | while :; do read -r l; notify-cmd "$l"; done fi + if (( loop_start >= EPOCH_SECONDS - 1 )); then + fail_loops+=1 + else + fail_loops=0 + fi + if (( fail_loops >= 60 * 60 * 3 )); then + echo "prof-tail fail_loops: $fail_loops" >&2 + exit 1 + fi + sleep 2 done diff --git a/system-status b/system-status index eebb078..ce733ab 100755 --- a/system-status +++ b/system-status @@ -139,6 +139,9 @@ write-status() { chars+=(EXIM) fi + if ! pgrep -fc '^/bin/bash /usr/local/bin/prof-tail$' &>/dev/null; then + chars+=(PROF) + fi if [[ -e /a/bin/bash_unpublished/source-state ]]; then # /a gets remounted due to btrbk, ignore error code for file doesnt exist -- 2.30.2