From c5021d1e8ad29f946b28d7a22d959e691e28bf32 Mon Sep 17 00:00:00 2001 From: Ian Kelling Date: Sat, 22 Jul 2023 00:32:43 -0400 Subject: [PATCH] fixes and qd for better source subvol error repo --- brc2 | 38 +++++++- btrbk-run | 92 +++++++------------ distro-end | 16 +--- filesystem/etc/prometheus/rules/iank.yml | 2 +- .../etc/systemd/system/btrbk-spread.service | 2 +- .../etc/systemd/system/btrbk-spread.timer | 8 -- mount-latest-subvol | 5 +- subdir_files/.config/amtool/config.yml | 2 - switch-mail-host | 17 ++-- 9 files changed, 86 insertions(+), 96 deletions(-) delete mode 100644 machine_specific/kd/filesystem/etc/systemd/system/btrbk-spread.timer delete mode 100644 subdir_files/.config/amtool/config.yml diff --git a/brc2 b/brc2 index 86512de..0432e3a 100644 --- a/brc2 +++ b/brc2 @@ -2534,6 +2534,10 @@ mnsnonet() { lom() { # l = the loopback device local l base + # get sudo pass cached right away + if ! sudo -nv 2>/dev/null; then + sudo -v + fi if [[ $1 == /* ]]; then base=${1##*/} fs_file=$1 @@ -3635,7 +3639,7 @@ rem() { find $paths -not \( -name .svn -prune -o -name .git -prune \ -o -name .hg -prune -o -name .editor-backups -prune \ -o -name .undo-tree-history -prune \) 2>/dev/null | grep -iP --color=auto "$*" ||: - rgv -m 5 "$*" $paths ||: + rgv -m 5 "$*" $paths /a/t.org /p/w.org /a/work.org ||: } # setup: @@ -3801,10 +3805,40 @@ fi # rg with respecting vcs ignore files rgv() { ret=0 - command rg -. -z --no-messages -L -i -M 900 -g '!auto-save-list' -g '!.savehist' "$@" || ret=$? + # -. = search dotfiles + # -z = search zipped files + # -i = case insensitive + # -M = max columns + # --no-messages because of annoying errors on broken symlinks + command rg -. -z --no-messages -i -M 900 -g '!.git' -g '!auto-save-list' -g '!.savehist' "$@" || ret=$? return $ret } +amall() { + printf "$(tput setaf 5 2>/dev/null ||:)█ coresite █$(tput sgr0 2>/dev/null||:)" + amfsf "$@" + printf "$(tput setaf 5 2>/dev/null ||:)█ office █$(tput sgr0 2>/dev/null||:)" + amoffice "$@" +} +amallq() { # amall quiet + amfsf "$@" + amoffice "$@" +} +amfsf() { + sedi -r '/alertmanager.url/s/@office//' ~/.config/amtool/config.yml + amtool "$@" +} +amoffice() { + sedi -r '/alertmanager.url/s/@fsf/@office.fsf/' ~/.config/amtool/config.yml + amtool "$@" +} +amls() { + amall silence query "$@" +} +amrmall() { + amfsf silence expire $(amfsf silence query -q) + amoffice silence expire $(amoffice silence query -q) +} # taken from default changes to bashrc and bash_profile path-add --end --ifexists $HOME/.rvm/bin diff --git a/btrbk-run b/btrbk-run index 057ff79..f861880 100644 --- a/btrbk-run +++ b/btrbk-run @@ -90,7 +90,7 @@ targets=() early=false cron=false fast=false -kd_spread_maybe=false +kd_spread=false orig_args=("$@") temp=$(getopt -l cron,fast,pull-reexec,help 23cefikl:m:npqrs:t:vh "$@") || usage 1 eval set -- "$temp" @@ -116,15 +116,15 @@ while true; do # switch mail-host, no need to repeat the same checks again. --fast) fast=true ;; -i) incremental_strict=true ;; - # note this implies resume - -k) kd_spread_maybe=true ;; + # note this implies resume, cron and -p + -k) kd_spread=true ;; # bytes per second, suffix k m g -l) rate_limit=$2; shift ;; # Comma separated mountpoints to backup. This has defaults set below. -m) IFS=, mountpoints=($2); unset IFS; shift ;; -n) dry_run=true ;; - # hide progress - -p) progress_arg= ;; + # preserve existing snapshots and backups + -p) preserve_arg=-p ;; # internal option for rerunning under newer SOURCE_HOST version. --pull-reexec) pull_reexec=true;; # quiet @@ -152,11 +152,16 @@ done cmd_arg="$1" -if $kd_spread_maybe; then +if $kd_spread; then if [[ $cmd_arg && $cmd_arg != resume ]]; then die "dont pass -k without resume or empty run arg" fi + if [[ $HOSTNAME == "$MAIL_HOST" ]]; then + die "something went wrong, -k not meant to be run on MAIL_HOST" + fi cmd_arg=resume + preserve_arg=-p + cron=true fi if [[ ! $cmd_arg ]]; then @@ -214,41 +219,11 @@ fi # targets, plus any given on the command line. - -kd_spread=false -if ! $cron && $kd_spread_maybe; then - kd_spread=true -fi # set default targets if [[ ! -v targets && ! $source ]]; then - if $cron; then - if [[ $HOSTNAME != "$MAIL_HOST" ]]; then - if $kd_spread_maybe && [[ $HOSTNAME == kd && $MAIL_HOST == x3 ]]; then - if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then - work_host=x3.office.fsf.org - elif ping -q -c1 -w1 x3wg.b8.nz &>/dev/null; then - work_host=x3wg.b8.nz - fi - if [[ $work_host ]]; then - source_state="$(ssh $work_host cat /a/bin/bash_unpublished/source-state)" - eval "$source_state" - if [[ $MAIL_HOST == x3 ]]; then - kd_spread=true - else - # x3 was the mail host, but it moved to some other machine - # without updating us yet. - echo "MAIL_HOST=$MAIL_HOST, nothing to do" - mexit 0 - fi - else - echo "MAIL_HOST=$MAIL_HOST, nothing to do" - mexit 0 - fi - else - echo "MAIL_HOST=$MAIL_HOST, nothing to do" - mexit 0 - fi - fi + if $cron && ! $kd_spread && [[ $HOSTNAME != "$MAIL_HOST" ]]; then + echo "MAIL_HOST=$MAIL_HOST, nothing to do" + mexit 0 fi at_work=false @@ -290,22 +265,20 @@ if [[ ! -v targets && ! $source ]]; then targets+=(x3wg.b8.nz) fi fi - # temporarily disabled while doing recovery - # for h in frodo kd; do - for h in kd; do - if [[ $HOSTNAME == "$h" ]]; then - continue - fi - targets+=($h.b8.nz) - done - for h in x2 sy; do - if [[ $HOSTNAME == "$h" ]]; then - continue - fi - if ping -q -c1 -w1 $h.b8.nz &>/dev/null; then - targets+=($h.b8.nz) - elif ping -q -c1 -w1 ${h}w.b8.nz &>/dev/null; then - targets+=(${h}w.b8.nz) + if [[ $HOSTNAME != kd ]]; then + targets+=(kd.b8.nz) + fi + wireless_home_hosts=( + x2 + sy + ) + for h in ${wireless_home_hosts[@]}; do + if [[ $HOSTNAME != "$h" ]]; then + if ping -q -c1 -w1 $h.b8.nz &>/dev/null; then + targets+=($h.b8.nz) + elif ping -q -c1 -w1 ${h}w.b8.nz &>/dev/null; then + targets+=(${h}w.b8.nz) + fi fi done elif $at_work; then @@ -347,17 +320,17 @@ else prospective_mps+=(/o) fi if [[ $source_host == "$HOST2" ]]; then - prospective_mps+=(/a /ar /qr /q) + prospective_mps+=(/a /ar /qr /qd /q) fi else if [[ $HOSTNAME == "$MAIL_HOST" ]]; then prospective_mps+=(/o) fi if [[ $HOSTNAME == "$HOST2" ]]; then - prospective_mps+=(/a /ar /qr /q) + prospective_mps+=(/a /ar /qr /qd /q) fi if $kd_spread; then - prospective_mps=(/a /ar /o /qr /q) + prospective_mps=(/a /ar /o /qr /qd /q) fi fi # note: put q last just in case its specific retention options were to @@ -724,6 +697,9 @@ fi if [[ $ret == 0 ]]; then for tg in ${targets[@]}; do h=$(ssh $tg hostname) + if [[ $h == kd && $HOSTNAME == x3 && $HOSTNAME == "$MAIL_HOST" ]]; then + ssh root$tg systemctl --no-block start btrbk-spread + fi rsync -a -f"- */" -f"+ *" /var/log/btrbk/ root@$tg:/var/log/btrbk/$tg ssh root@$tg /usr/local/bin/mail-backup-clean done diff --git a/distro-end b/distro-end index 855f937..02b5937 100755 --- a/distro-end +++ b/distro-end @@ -619,7 +619,7 @@ case $HOSTNAME in dnsb8 fi - s /c/roles/prom_export/files/simple/usr/local/bin/fsf-install-node-exporter -l 127.0.0.1:9100 + s /c/roles/prom_export/files/simple/usr/local/bin/fsf-install-node-exporter -l 127.0.0.1 # ex for exporter web-conf -p 9101 -f 9100 - apache2 ${HOSTNAME}ex.b8.nz <<'EOF' @@ -1950,23 +1950,13 @@ sgo dynamicipupdate if grep -xFq $HOSTNAME /a/bin/ds/machine_specific/btrbk.hosts; then sgo btrbk.timer fi -if [[ $HOSTNAME == kd ]]; then - sgo btrbk-spread.timer -fi -# note: to see when it was last run, +# note: to see when a timer was last run, # ser list-timers -case $HOSTNAME in - kd) - sgo btrbkrust.timer - ;; -esac ### begin prometheus ### - - case $HOSTNAME in kd) # Font awesome is needed for the alertmanager ui. @@ -2007,7 +1997,7 @@ EOF ser restart prometheus-alertmanager fi - s /c/roles/prom_export/files/simple/usr/local/bin/fsf-install-node-exporter -l 127.0.0.1:9100 + s /c/roles/prom_export/files/simple/usr/local/bin/fsf-install-node-exporter -l 127.0.0.1 for ser in prometheus-node-exporter prometheus-alertmanager prometheus; do sysd-prom-fail-install $ser diff --git a/filesystem/etc/prometheus/rules/iank.yml b/filesystem/etc/prometheus/rules/iank.yml index 0049743..47012cc 100644 --- a/filesystem/etc/prometheus/rules/iank.yml +++ b/filesystem/etc/prometheus/rules/iank.yml @@ -199,7 +199,7 @@ groups: # 19 for 19 minutes, but I make it 18 just to give a bit of slack. - alert: historical_missing_metric expr: |- - count_over_time(up{job="prometheus"}[19m]) <= 18 unless on() present_over_time(ALERTS[19m]) unless on() time() - node_boot_time_seconds{instance="kdwg:9101"} <= 60 * 17 + count_over_time(up{job="prometheus"}[19m]) <= 18 unless on() present_over_time(ALERTS[19m]) unless on() time() - node_boot_time_seconds{instance="kd"} <= 60 * 17 labels: severity: warn diff --git a/machine_specific/kd/filesystem/etc/systemd/system/btrbk-spread.service b/machine_specific/kd/filesystem/etc/systemd/system/btrbk-spread.service index 44e6ceb..b1b3f96 100644 --- a/machine_specific/kd/filesystem/etc/systemd/system/btrbk-spread.service +++ b/machine_specific/kd/filesystem/etc/systemd/system/btrbk-spread.service @@ -5,6 +5,6 @@ After=multi-user.target [Service] Type=oneshot ExecStartPre=/a/exe/install-my-scripts -ExecStart=/usr/local/bin/sysd-mail-once -t daylert@iankelling.org btrbk /usr/local/bin/btrbk-run --cron -k +ExecStart=/usr/local/bin/sysd-mail-once -t daylert@iankelling.org btrbk /usr/local/bin/btrbk-run -k ExecStartPost=/bin/sleep 1 ExecStartPost=/a/exe/install-my-scripts diff --git a/machine_specific/kd/filesystem/etc/systemd/system/btrbk-spread.timer b/machine_specific/kd/filesystem/etc/systemd/system/btrbk-spread.timer deleted file mode 100644 index 875d6ce..0000000 --- a/machine_specific/kd/filesystem/etc/systemd/system/btrbk-spread.timer +++ /dev/null @@ -1,8 +0,0 @@ -[Unit] -Description=Run btrbk-run hourly - -[Timer] -OnCalendar=*-*-* *:45:00 - -[Install] -WantedBy=timers.target diff --git a/mount-latest-subvol b/mount-latest-subvol index 099c38f..bfe99f9 100644 --- a/mount-latest-subvol +++ b/mount-latest-subvol @@ -171,7 +171,7 @@ done if (( $# )); then all_vols=( "$@" ) else - all_vols=(q a o i ar qr) + all_vols=(q a o i ar qd qr) fi ##### end command line parsing ######## @@ -242,6 +242,7 @@ fa=(/mnt/root/btrbk/q.*); f=${fa[0]} if [[ -e $f ]]; then fstab <