From: Ian Kelling <ian@iankelling.org>
Date: Sun, 14 Jul 2024 22:41:36 +0000 (-0400)
Subject: hopefully a btrbk fix, misc improvements
X-Git-Url: https://iankelling.org/git/?a=commitdiff_plain;h=368b884d608213573d4e97bb7179b4ff5b60d482;p=distro-setup

hopefully a btrbk fix, misc improvements
---

diff --git a/brc b/brc
index 37733f3..8aded5d 100644
--- a/brc
+++ b/brc
@@ -665,20 +665,27 @@ ccomp time pd
 # The service is unaffected if our ssh connection dies, no need to run
 # in screen or tmux.
 #
-# Note: The last few lines of any existing entries for a unit by that
-# name will be output first, and there will be a few second delay at the
-# start of the command, and a second or so at the end.
+# Note: There will be a few second delay at the start of the command,
+# and a second or so at the end.
 #
 # Note: Functions and aliases obviously won't work, we resolve the
 # command to a file.
 #
-# Note: requires running as root.
+# More comparison to others approaches: systemd-cat: fails if you do
+# systemctl daemon-reload, and I've had other approaches. I haven't yet
+# really tried logging with script, sudo, or screen.
 jdo() {
-  local cmd cmd_name jr_pid ret
+  local cmd cmd_name jr_pid ret sdrun_args sdrun_prefix
   ret=0
   cmd="$1"
   shift
   if [[ $EUID != 0 ]]; then
+    # note, I don't use system --user because if it does sudo ssh, that
+    # will leave a process around that we can't kill and it will leave
+    # the unit hanging around in a failed state needing manual killing
+    # of the process.
+    sdrun_prefix=sudo
+    sdrun_args="--uid "$(id -u)" --gid "$(id -g)" -E SSH_AUTH_SOCK=/run/openssh_agent"
     echo "jdo: error: rerun as root"
     return 1
   fi
@@ -686,13 +693,13 @@ jdo() {
   if [[ $cmd != /* ]]; then
     cmd=$(type -P "$cmd")
   fi
+  #note date format for since is date '+%F %T'
   # -q = quiet
-  journalctl -qn2 -f -u "$cmd_name" &
+  journalctl --since=now -qn2 -f -u "$cmd_name" &
   jr_pid=$!
-  # Trial and error of time needed to avoid missing initial lines.
-  # .5 was not reliable. 1 was not reliable. 2 was not reliable
-  sleep 4
-  systemd-run --unit "$cmd_name" --wait --collect "$cmd" "$@" || ret=$?
+  # todo: trial an error testing of the wait time here.
+  sleep 1
+  $sdrun_prefix systemd-run $sdrun_args --unit "$cmd_name" --wait --collect "$cmd" "$@" || ret=$?
   # The sleep lets the journal output its last line
   # before the prompt comes up.
   sleep .5
@@ -723,6 +730,11 @@ tsf() {
 # usage: tsl LOG_PATH_PREFIX COMMAND...
 # example: tsl /root/command
 # log file will be like /root/command-2024-02-10.log
+#
+# Caveats: If there is a user prompt, like "read -p prompt var", it will
+# hang without outputting the prompt. Sometimes some output will get
+# hidden until you hit enter.
+#
 tsl() {
   local log_prefix log_path appending ret
   if (( $# < 2 )); then
@@ -2000,7 +2012,7 @@ jrfg() {
   bn _jrfg "$@"
 }
 jru() {
-  SYSTEMD_COLORS=true bn journalctl -n1000 -f -u "$@" ;
+  journalctl -nall -u "$@" ;
 }
 jrug() {
   _jrug() {
diff --git a/brc2 b/brc2
index c4dfdea..c7de4ab 100644
--- a/brc2
+++ b/brc2
@@ -1658,82 +1658,6 @@ order by timeSent;"
   sqlite3 /p/cheogram/b ".mode tabs" "$q" | sed 's/ /./' | less
 }
 
-# version of jdo for my non-root user
-jdo() {
-  # comparison of alternative logging methods:
-  #
-  # systemd-run command (what this function does)
-  #
-  # If there is a user prompt, the program will detect that it is not
-  # connected to a terminal and act in a non-interactive way, skipping
-  # the prompt. This has the benefit that you know exactly how the
-  # program will act if you want to move it into a service that runs
-  # automatically.
-  #
-  # If run with sudo and command is a shell script which does a sleep,
-  # it can (sometimes?) output some extra whitespace in front of
-  # messages, more for each subsequent message. This can be avoided by
-  # becoming root first.
-  #
-  # It logs the command's pid and exit code, which is nice.
-  #
-  #
-  ### command |& ts | tee file.log
-  #
-  # If there is a user prompt, like "read -p prompt var", it will hang
-  # without outputting the prompt.
-  #
-  # I've had a few times where ts had an error and I wasn't totally sure
-  # if it was really the command or ts having the problem.
-  #
-  # Sometimes some output will get hidden until you hit enter.
-  #
-  #
-  ### command |& pee cat logger
-  #
-  # This seems to work. I need to test more.
-  #
-  #
-  ### command |& logger -s
-  #
-  # User prompts get confusingly prefixed to earlier output, and all log
-  # entries get prefixed with annoying priority level.
-  #
-  #
-  ### systemd-cat
-  #
-  # Had a few problems. One major one is that it exited in the middle of
-  # a command on systemctl daemon-reload
-  #
-  # Related commands which can log a whole session: script, sudo, screen
-  local cmd cmd_name jr_pid ret
-  ret=0
-  cmd="$1"
-  shift
-  cmd_name=${cmd##*/}
-  if [[ $cmd != /* ]]; then
-    cmd=$(type -P "$cmd")
-  fi
-  #note date format for since is date '+%F %T'
-  # -q = quiet
-  journalctl --since=now -qn2 -f -u "$cmd_name" &
-  jr_pid=$!
-  # note, we could have a version that does system --user, but if for example
-  # it does sudo ssh, that will leave a process around that we can't kill
-  # and it will leave the unit hanging around in a failed state needing manual
-  # killing of the process.
-  s systemd-run --uid "$(id -u)" --gid "$(id -g)" \
-    -E SSH_AUTH_SOCK=/run/openssh_agent \
-    --unit "$cmd_name" --wait --collect "$cmd" "$@" || ret=$?
-  # The sleep lets the journal output its last line
-  # before the prompt comes up.
-  sleep .5
-  kill $jr_pid &>/dev/null ||:
-  unset jr_pid
-  fg &>/dev/null ||:
-  # this avoids any err-catch
-  (( ret == 0 )) || return $ret
-}
 
 # service run, and watch the output
 srun() {
diff --git a/btrbk-run b/btrbk-run
index c8aa989..f7a7c4a 100644
--- a/btrbk-run
+++ b/btrbk-run
@@ -30,6 +30,10 @@
 set -e; . /usr/local/lib/bash-bear; set +e
 shopt -s nullglob
 
+pre=btrbk-run
+script_name="${BASH_SOURCE[0]}"
+script_name="${script_name##*/}"
+
 usage() {
   cat <<'EOF'
 btrbk-run [OPTIONS] [run|resume|archive]
@@ -44,16 +48,6 @@ EOF
   exit $1
 }
 
-
-
-pre=btrbk-run
-
-
-
-script_name="${BASH_SOURCE[0]}"
-script_name="${script_name##*/}"
-
-
 log-setup() {
   if [[ ! $log_path ]]; then
     mkdir -p /var/log/btrbk
@@ -165,10 +159,9 @@ qconf() {
       # q has sensitive data i dont want to backup for so long
       cat >>/etc/btrbk$conf_suf.conf <<EOF
 snapshot_preserve $q_preserve
-snapshot_preserve_min 2h
 snapshot_dir btrbk
 target_preserve $q_preserve
-target_preserve_min 2h
+target_preserve_min $preserve_min_this_run
 EOF
       ;;
   esac
@@ -176,6 +169,181 @@ EOF
 }
 
 
+write-config() {
+
+  cat >/etc/btrbk$conf_suf.conf <<EOF
+ssh_identity /q/root/h
+# if there is some problem with our ssh filter, we could use this temporarily.
+#ssh_identity /root/.ssh/home
+
+# trying this out
+#stream_compress zstd
+
+# so we only run one at a time
+lockfile                   /var/lock/btrbk$conf_suf.lock
+
+# default format of short does not accomidate hourly preservation setting
+timestamp_format long-iso
+
+# only make a snapshot if things have changed
+snapshot_create onchange
+# I could make this different from target_preserve,
+# if one disk had less space.
+# for now, keeping them equal.
+snapshot_preserve $std_preserve
+snapshot_preserve_min $preserve_min_this_run
+snapshot_dir btrbk
+# so, total backups = ~58
+target_preserve $std_preserve
+target_preserve_min $preserve_min_this_run
+
+# it seems likely that not doing this could result in clone source not found
+# errors. For example when expiry happens differently on different hosts,
+# also, as btrbk does by default, if a failed send happens, on the next run it
+# will warn about a stray subvolume, but then with the default setting,
+# it seems that it could create a backup of a newer subvol
+# and use an older subvol as the parent. That could lead to data being stored
+# in different subvolumes on different machines, thus, clone source error.
+incremental_prefs sao:1
+
+# if something fails and it's not obvious, try doing
+# btrbk -l trace -v dryrun
+
+rate_limit $rate_limit
+EOF
+
+  if $incremental_strict; then
+    cat >>/etc/btrbk$conf_suf.conf <<EOF
+incremental strict
+EOF
+  fi
+
+  # make /q be last
+  mp_count=${#mountpoints[@]}
+  for (( i=0; i < mp_count - 1 ; i++ )); do
+    if [[ ${mountpoints[i]} == /q ]]; then
+      unset "mountpoints[i]"
+      mountpoints+=(/q)
+    fi
+  done
+
+
+
+  snap_list_cmds=()
+  local_snap_list_cmds=()
+  tg_snaps=()
+  source_snaps=()
+
+  for m in ${mountpoints[@]}; do
+    case $m in
+      /o)
+        vol=/mnt/o
+        ;;
+      *)
+        vol=/mnt/root
+        ;;
+    esac
+
+    sub=${m#/}
+    snap_list_cmds+=("echo $vol/btrbk/$sub.*;")
+    local_snap_list_cmds+=("echo $vol/btrbk/$sub.*")
+
+    if [[ $source ]]; then
+      tmp_a=($vol/btrbk/$sub.*)
+      tg_snaps+=("${tmp_a[*]}")
+      cat >>/etc/btrbk$conf_suf.conf <<EOF
+volume ssh://$bbksource$vol
+subvolume $sub
+EOF
+      qconf
+      cat >>/etc/btrbk$conf_suf.conf <<EOF
+target send-receive $vol/btrbk
+EOF
+    else # we have targets
+      for snap in "$vol/btrbk/$sub."*; do
+        source_snaps[$snap]=t
+      done
+
+      cat >>/etc/btrbk$conf_suf.conf <<EOF
+volume $vol
+subvolume $sub
+EOF
+      qconf
+      for tg in ${targets[@]}; do
+        # handle ipv6
+        if [[ $tg == *:* ]]; then
+          e "note: target is ipv6 address:$tg"
+          tg="[$tg]"
+        fi
+        cat >>/etc/btrbk$conf_suf.conf <<EOF
+target send-receive ssh://$tg$vol/btrbk
+EOF
+      done
+    fi
+  done
+
+  if $conf_only; then
+    mexit 0
+  fi
+
+}
+
+
+get-orphan-tg-snaps() {
+  orphan_tg_snaps=()
+  for (( i=0; i < ${#mountpoints[@]}; i++ )); do
+    orphan_start_count=${#orphan_tg_snaps[@]}
+    tg_snap_count=0
+    for tg_snap in ${tg_snaps[$i]}; do
+      tg_snap_count=$(( tg_snap_count + 1 ))
+      if [[ ! ${source_snaps[$tg_snap]} ]]; then
+        orphan_tg_snaps+=("$tg_snap")
+      fi
+    done
+    orphan_mp_count=$(( ${#orphan_tg_snaps[@]} - orphan_start_count ))
+    # sanity checking
+    tmp=$(( tg_snap_count  > 1 && tg_snap_count == orphan_mp_count ))
+    if (( tmp )) ; then
+      die "something went wrong checking orphans on $tg: for mountpoint ${mountpoints[$i]}, $orphan_mp_count"
+    fi
+  done
+}
+
+# Note, this depends on write-config being called first.
+#
+# Delete any subvols on the receiving host that don't exist on the
+# sending host.  Otherwise, the receiving host could have snapshots that
+# aren't on the sending side, and thus become odd leaf subvols, and then
+# btrbk could try to use them when we sync back, creating a weird tree
+# instead of linear parent/child relationship. Maybe this could lead to
+# a missing source subvol error, so lets avoid it.
+del-orphan-snaps() {
+  if [[ $source ]]; then
+    tmpstr=$(ssh root@$source "shopt -s nullglob; ${snap_list_cmds[*]}")
+    IFS=" " read -r -a source_snap_list <<<"$tmpstr"
+    for snap in "${source_snap_list[@]}"; do
+      source_snaps[$snap]=t
+    done
+    get-orphan-tg-snaps
+    tmp=$(( ${#orphan_tg_snaps[*]} >= 1 ))
+    if (( tmp )); then
+      d btrfs sub del ${orphan_tg_snaps[*]}
+    fi
+  else # we have targets
+    for tg in ${targets[@]}; do
+      tmp_str=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}")
+      mapfile -t tg_snaps <<<"$tmp_str"
+      get-orphan-tg-snaps
+      tmp=$(( ${#orphan_tg_snaps[*]} >= 1 ))
+      if (( tmp )); then
+        d ssh root@$tg "btrfs sub del ${orphan_tg_snaps[*]}"
+      fi
+    done
+  fi
+}
+
+#### end functions ####
+
 # latest $MAIL_HOST
 if [[ -e /b/bash_unpublished/source-state ]]; then
   source /b/bash_unpublished/source-state
@@ -183,6 +351,7 @@ fi
 
 # note q is owned by root:1000
 
+declare -A source_snaps
 mountpoints=()
 
 rsync_mountpoint=/q
@@ -192,6 +361,8 @@ ret=0
 conf_only=false
 dry_run=false # mostly for testing
 rate_limit=no
+# -q and just using the syslog option seemed nice,
+# but it doesn't show when a send has a parent and when it doesn't.
 verbose=true; verbose_arg="-l trace"
 force=false
 if [[ $PPID == 1 ]]; then
@@ -342,9 +513,14 @@ fi
 
 std_preserve="36h 14d 8w 24m"
 q_preserve="18h 14d 8w"
+preserve_min=6h
 
+prune=false
 case $cmd_arg in
-  run|resume) : ;;
+  run|resume)
+    # see notes at the end for why we set this.
+    prune=true
+    ;;
 
   # This works better than the normal archive command. We have to
   # specify the mount points, but that is what we are used to doing and
@@ -355,9 +531,6 @@ case $cmd_arg in
   # time s btrbk -v archive /mnt/r7/amy/boot/btrbk ssh://bo/mnt/boot2/btrbk
   archive)
     cmd_arg=resume
-    std_preserve="999h 999d 999w 999m"
-    q_preserve="$std_preserve"
-    preserve_arg=-p
     ;;
   *) die "untested command arg" ;;
 esac
@@ -680,6 +853,8 @@ df --output=size,pcent / | tail -n1"
     # This is a separate ssh because the command can fail and thatis ok.
     if ! $force; then
       locked=false
+      # note Invalid MIT-MAGIC-COOKIE-1 keyxscreensaver-command: can't open display :0 is expected if
+      # the system is locked by greeter.
       if lock_info=$($ssh_timeout ssh $h DISPLAY=:0 xscreensaver-command -time); then
         if [[ $lock_info != *non-blanked* ]]; then
           locked=true
@@ -713,177 +888,34 @@ df --output=size,pcent / | tail -n1"
 fi
 
 
-cat >/etc/btrbk$conf_suf.conf <<EOF
-ssh_identity /q/root/h
-#ssh_identity /root/.ssh/home
-
-# trying this out
-#stream_compress zstd
-
-# so we only run one at a time
-lockfile                   /var/lock/btrbk$conf_suf.lock
-
-# default format of short does not accomidate hourly preservation setting
-timestamp_format long-iso
-
-# only make a snapshot if things have changed
-snapshot_create onchange
-# I could make this different from target_preserve,
-# if one disk had less space.
-# for now, keeping them equal.
-snapshot_preserve $std_preserve
-snapshot_preserve_min 6h
-snapshot_dir btrbk
-# so, total backups = ~58
-target_preserve $std_preserve
-target_preserve_min 6h
-
-# it seems very likely that not doing this could result in clone source not found
-# errors, for example when expiry happens differently on different hosts,
-# also, as btrbk does by default, if a failed send happens, on the next run it
-# will warn about a stray subvolume, but then create a backup of a newer subvol
-# and use an older subvol as the parent.
-incremental_prefs sao:1
-
-# if something fails and it's not obvious, try doing
-# btrbk -l trace -v dryrun
-
-rate_limit $rate_limit
-EOF
-
-if $incremental_strict; then
-  cat >>/etc/btrbk$conf_suf.conf <<EOF
-incremental strict
-EOF
-fi
-
-
-# make /q be last
-mp_count=${#mountpoints[@]}
-for (( i=0; i < mp_count - 1 ; i++ )); do
-  if [[ ${mountpoints[i]} == /q ]]; then
-    unset "mountpoints[i]"
-    mountpoints+=(/q)
-  fi
-done
-
-
-
-snap_list_cmds=()
-tg_snaps=()
-declare -A source_snaps
-
-for m in ${mountpoints[@]}; do
-  case $m in
-    /o)
-      vol=/mnt/o
-      ;;
-    *)
-      vol=/mnt/root
-      ;;
-  esac
-
-  sub=${m#/}
-  snap_list_cmds+=("echo $vol/btrbk/$sub.*;")
-
-  if [[ $source ]]; then
-    tmp_a=($vol/btrbk/$sub.*)
-    tg_snaps+=("${tmp_a[*]}")
-    cat >>/etc/btrbk$conf_suf.conf <<EOF
-volume ssh://$bbksource$vol
-subvolume $sub
-EOF
-    qconf
-    cat >>/etc/btrbk$conf_suf.conf <<EOF
-target send-receive $vol/btrbk
-EOF
-  else # we have targets
-    for snap in "$vol/btrbk/$sub."*; do
-      source_snaps[$snap]=t
-    done
-
-    cat >>/etc/btrbk$conf_suf.conf <<EOF
-volume $vol
-subvolume $sub
-EOF
-    qconf
-    for tg in ${targets[@]}; do
-      # handle ipv6
-      if [[ $tg == *:* ]]; then
-        tg="[$tg]"
-      fi
-      cat >>/etc/btrbk$conf_suf.conf <<EOF
-target send-receive ssh://$tg$vol/btrbk
-EOF
-    done
-  fi
-done
-
-# Delete any subvols on the receiving host that don't exist on the
-# sending host.  Otherwise, the receiving host could have snapshots that
-# aren't on the sending side, and thus become odd leaf subvols, and then
-# btrbk could try to use them when we sync back, creating a weird tree
-# instead of linear parent/child relationship. Maybe this could lead to
-# a missing source subvol error, so lets avoid it.
-
-get-orphan-tg-snaps() {
-  orphan_tg_snaps=()
-  for (( i=0; i < ${#mountpoints[@]}; i++ )); do
-    orphan_start_count=${#orphan_tg_snaps[@]}
-    tg_snap_count=0
-    for tg_snap in ${tg_snaps[$i]}; do
-      tg_snap_count=$(( tg_snap_count + 1 ))
-      if [[ ! ${source_snaps[$tg_snap]} ]]; then
-        orphan_tg_snaps+=("$tg_snap")
-      fi
-    done
-    orphan_mp_count=$(( ${#orphan_tg_snaps[@]} - orphan_start_count ))
-    # sanity checking
-    tmp=$(( tg_snap_count  > 1 && tg_snap_count == orphan_mp_count ))
-    if (( tmp )) ; then
-      die "something went wrong checking orphans on $tg: for mountpoint ${mountpoints[$i]}, $orphan_mp_count"
-    fi
-  done
-}
-
-if [[ $source ]]; then
-  for snap in $(ssh root@$source "shopt -s nullglob; ${snap_list_cmds[*]}"); do
-    source_snaps[$snap]=t
-  done
-  get-orphan-tg-snaps
-  tmp=$(( ${#orphan_tg_snaps[*]} >= 1 ))
-  if (( tmp )); then
-    d btrfs sub del ${orphan_tg_snaps[*]}
-  fi
-else # we have targets
-  for tg in ${targets[@]}; do
-    tmp_str=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}")
-    mapfile -t tg_snaps <<<"$tmp_str"
-    get-orphan-tg-snaps
-    tmp=$(( ${#orphan_tg_snaps[*]} >= 1 ))
-    if (( tmp )); then
-      d ssh root@$tg "btrfs sub del ${orphan_tg_snaps[*]}"
-    fi
-  done
-fi
-
 # todo: umount first to ensure we don't have any errors
 # todo: do some kill fuser stuff to make umount more reliable
 
-
-if $conf_only; then
-  mexit 0
+if $prune; then
+  preserve_min_this_run="$preserve_min"
+  write-config
+  # I'd have to do a bit more thinking, but maybe doing this here helps
+  # prune to decide to preserve the same snapshots on different
+  # hosts. Otherwise, this is redundant and not needed.
+  del-orphan-snaps
+  if $dry_run; then
+    m btrbk -c /etc/btrbk$conf_suf.conf -v -n prune
+  else
+    logq btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg prune
+  fi
 fi
 
 
+preserve_min_this_run=all
+write-config
+del-orphan-snaps
 
 if $dry_run; then
   m btrbk -c /etc/btrbk$conf_suf.conf -v -n $cmd_arg
   mexit 0
+else
+  logq btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg $cmd_arg
 fi
-# -q and just using the syslog option seemed nice,
-# but it doesn't show when a send has a parent and when it doesn't.
-logq btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg $cmd_arg
 
 if $early; then
   exit 0
@@ -921,9 +953,26 @@ fi
 
 
 ## run extra commands on targets
+
+local_snaps=$(${local_snap_list_cmds[*]})
 if [[ $ret == 0 ]]; then
   for tg in ${targets[@]}; do
     h=$(ssh $tg hostname)
+    remote_snaps=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}")
+    # a check like this will catch the situation we aim to prevent by running purge
+    if [[ $local_snaps != "$remote_snaps" ]]; then
+      localtmp=$(mktemp)
+      printf "%s\n" "$local_snaps" |tr ' ' '\n' >$localtmp
+      remotetmp=$(mktemp)
+      printf "%s\n" "$remote_snaps" |tr ' ' '\n' >$remotetmp
+      e "error: for $tg, remote and local snaps are different."
+      e "local: $local_snaps"
+      e "tg:$tg = $remote_snaps"
+      e "diff -u local remote"
+      diff -u $localtmp $remotetmp
+      rm $localtmp $remotetmp
+      ret=1
+    fi
     if [[ $h == kd && $HOSTNAME == x3 && $HOSTNAME == "$MAIL_HOST" ]]; then
       d ssh root@$tg 'btrbk-spread-wrap &>/dev/null </dev/null &'
     fi
@@ -964,3 +1013,109 @@ mexit $ret
 # retention, so I'm using long-iso.
 
 # note to create a long-iso timestamp: date +%Y%m%dT%H%M%S%z
+
+
+
+##### background of why we purge then run ####
+
+# example:
+
+# local computer is so, created /etc/btrbk.conf:
+
+# snapshot_create onchange
+# timestamp_format long-iso
+# snapshot_dir btrbk
+# volume /mnt/root/btest
+# subvolume x
+# snapshot_preserve 18h 14d 8w
+# snapshot_preserve_min 2h
+# snapshot_dir btrbk
+# target_preserve 18h 14d 8w
+# target_preserve_min 2h
+# target send-receive ssh://kd.b8.nz/mnt/root/btest/btrbk/
+
+# #### end conf
+
+# First, I created some incremental backups by manually modifying a test
+# file, then running btrbk. Then I renamed the subvols to be over a set
+# of times when daily retention kicked in, but 2 in the same day. I
+# btrbked the files over to another host, then intentionally deleted
+# some so that:
+
+
+# on so:
+
+# x.20240710T141521-0400
+# x.20240711T141719-0400
+# x.20240711T142115-0400
+# x.20240712T142254-0400
+
+
+# on kd:
+
+# x.20240710T141521-0400
+# x.20240711T141719-0400
+
+
+# btrbk -c /etc/btrbk.conf -n -v run
+# btrbk command line client, version 0.32.6  (Sun Jul 14 14:37:56 2024)
+# Using configuration: /etc/btrbk.conf
+# Snapshot creation skipped: snapshot_create=onchange, snapshot is up-to-date: /mnt/root/btest/btrbk/x.20240712T142254-0400
+# Checking for missing backups of subvolume "/mnt/root/btest/x" in "kd.b8.nz:/mnt/root/btest/btrbk/"
+# Creating incremental backup...
+# [send/receive] target: kd.b8.nz:/mnt/root/btest/btrbk/x.20240712T142254-0400
+# [send/receive] source: /mnt/root/btest/btrbk/x.20240712T142254-0400
+# [send/receive] parent: /mnt/root/btest/btrbk/x.20240711T141719-0400
+# [send/receive] (dryrun, skip) checking target metadata: kd.b8.nz:/mnt/root/btest/btrbk/x.20240712T142254-0400
+# Created 1/1 missing backups
+# Cleaning backups of subvolume "/mnt/root/btest/x": kd.b8.nz:/mnt/root/btest/btrbk/x.*
+# Deleted 0 subvolumes in: kd.b8.nz:/mnt/root/btest/btrbk/x.*
+# Cleaning snapshots: /mnt/root/btest/btrbk/x.*
+# [delete] target: /mnt/root/btest/btrbk/x.20240711T142115-0400
+# Deleted 1 subvolumes in: /mnt/root/btest/btrbk/x.*
+# Completed within: 0s  (Sun Jul 14 14:37:56 2024)
+# --------------------------------------------------------------------------------
+# Backup Summary (btrbk command line client, version 0.32.6)
+
+#     Date:   Sun Jul 14 14:37:56 2024
+#     Config: /etc/btrbk.conf
+#     Dryrun: YES
+
+# Legend:
+#     ===  up-to-date subvolume (source snapshot)
+#     +++  created subvolume (source snapshot)
+#     ---  deleted subvolume
+#     ***  received subvolume (non-incremental)
+#     >>>  received subvolume (incremental)
+# --------------------------------------------------------------------------------
+# /mnt/root/btest/x
+# === /mnt/root/btest/btrbk/x.20240712T142254-0400
+# --- /mnt/root/btest/btrbk/x.20240711T142115-0400
+# >>> kd.b8.nz:/mnt/root/btest/btrbk/x.20240712T142254-0400
+
+# NOTE: Dryrun was active, none of the operations above were actually executed!
+
+# ### end output
+
+# What we actually want is:
+
+# [send/receive] target: kd.b8.nz:/mnt/root/btest/btrbk/x.20240711T142115-0400
+# [send/receive] source: /mnt/root/btest/btrbk/x.20240711T142115-0400
+# [send/receive] parent: /mnt/root/btest/btrbk/x.20240711T141719-0400
+
+# Note: I would expect btrbk -p to do this, but it actually works differently than preserve_min all
+
+# This is a good guess as to the source of my periodic clone source error,
+# however, when I hit that error, and then ran btrfs with correctly ordered
+# subvolumes, it did not fix the error. Deleting the parent subvolume on the
+# target host did solve the error. But, it could have been due to this thing
+# happening in an earlier send. I'll give it a year or so of testing.
+#
+# 2024-07-05 23:00:34 ___ [stderr] ERROR: clone: cannot find source subvol 3439b7f8-7130-e740-970c-9c21f5b1110b
+#
+# Note, the error is confusing because there exists a subvol with that
+# uuid, but I'm pretty sure what it is actually saying is that it can't
+# find some data it needs within that subvol when it expected to. At one
+# point, I had figured out a way to see the exact file it was failing
+# on, but last time I looked, I didn't see an easy way to do it. It
+# might be in my post to linux-btrfs about this.
diff --git a/distro-end b/distro-end
index 56fc735..77ff9d8 100755
--- a/distro-end
+++ b/distro-end
@@ -747,7 +747,7 @@ EOF
       # Pin-Priority: 1001
       # EOF
       #
-      # s fwupdmgr get-updates
+      # fwupdmgr get-updates
       # says I have 3 "devices with no available firmware updates"
       # if there were updates, install with: s fwupdmgr update
 
@@ -756,7 +756,10 @@ EOF
         check_downgrade=true
       fi
       # note this installs a kernel
-      pi system76-firmware system76-driver fwupd
+      pi system76-firmware system76-driver
+      pi-nostart fwupd
+      # some other service starts it. note: this still needs testing.
+      ser mask fwupd
       if $check_downgrade && [[ -e /etc/apt/preferences.d/system76-apt-preferences ]]; then
         # driver installs a preferences file to give s76 packages
         # priority so we may need to downgrade here.
diff --git a/filesystem/etc/cron.d/ian b/filesystem/etc/cron.d/ian
index 64e338c..fb67cf9 100644
--- a/filesystem/etc/cron.d/ian
+++ b/filesystem/etc/cron.d/ian
@@ -15,24 +15,24 @@
 # default is /bin/sh
 SHELL=/bin/bash
 # default is /usr/bin:/bin
-PATH=/sbin:/usr/sbin:/usr/bin:/bin:/usr/local/bin:/a/exe
+PATH=/sbin:/usr/sbin:/usr/bin:/bin:/usr/local/bin
 MAILTO=root
 
 */10 * * * *  root rootsshsync |& log-once -15 rootsshsync
 
 # If theres any logged errors we didnt handle in 4 days, maybe we accidentally missed them,
 # so report if we did
-4  9 * * 5   root /a/bin/ds/check-stale-alerts
-4 10 * * 5   root /a/bin/ds/check-radicale
-4 15 * * 5   iank /a/bin/ds/mailclean
-14 * * * *   root /a/bin/ds/bk-backup |& log-once -24 bk-backup
+4  9 * * 5   root check-stale-alerts
+4 10 * * 5   root check-radicale
+4 15 * * 5   iank mailclean
+14 * * * *   root bk-backup |& log-once -24 bk-backup
 0  7 * * *   iank failmail myupgrade-iank
 20 7 * * *   root myupgrade |& log-once -1 myupgrade
 20 5 * * *   root prof-backup |& log-once -1 prof-backup
 19 * * * *   root check-crypttab
-4 20 * * 5   iank /usr/local/bin/check-lets-encrypt-ssl-settings
-4 21 * * 5   iank /b/ds/auto-commit-changes /a /p
-4 23 * * 5   iank failmail /b/ds/eggdrop-upgrade
+4 20 * * 5   iank check-lets-encrypt-ssl-settings
+4 21 * * 5   iank auto-commit-changes /a /p
+4 23 * * 5   iank failmail eggdrop-upgrade
 
 # avoid dnssec expirations. This is a hack, what we should
 # do instead is something like, sign only if expiration is
diff --git a/filesystem/etc/udev/rules.d/99-kaleidoscope.rules b/filesystem/etc/udev/rules.d/99-kaleidoscope.rules
index b238912..5fa3ffc 100644
--- a/filesystem/etc/udev/rules.d/99-kaleidoscope.rules
+++ b/filesystem/etc/udev/rules.d/99-kaleidoscope.rules
@@ -10,8 +10,8 @@
 ##  - https://github.com/systemd/systemd/issues/4288
 ##  - https://www.freedesktop.org/software/systemd/man/sd-login.html
 
-# iank: substituted := for =, based on
-# Jun 09 12:27:48 so systemd-udevd[1385]: /etc/udev/rules.d/99-kaleidoscope.rules:18 ENV key ta
+## iank: substituted := for =, based on
+## Jun 09 12:27:48 so systemd-udevd[1385]: /etc/udev/rules.d/99-kaleidoscope.rules:18 ENV key ta
 kes '==', '!=', '=', or '+=' operator, assuming '='.
 
 SUBSYSTEMS=="usb", ATTRS{idVendor}=="1209", ATTRS{idProduct}=="2300", SYMLINK+="model01", ENV{ID_MM_DEVICE_IGNORE}="1", ENV{ID_MM_CANDIDATE}="0", TAG+="uaccess", TAG+="seat"
diff --git a/filesystem/usr/local/bin/fwupd-cron b/filesystem/usr/local/bin/fwupd-cron
new file mode 100755
index 0000000..cb4d652
--- /dev/null
+++ b/filesystem/usr/local/bin/fwupd-cron
@@ -0,0 +1,21 @@
+#!/bin/bash
+# I, Ian Kelling, follow the GNU license recommendations at
+# https://www.gnu.org/licenses/license-recommendations.en.html. They
+# recommend that small programs, < 300 lines, be licensed under the
+# Apache License 2.0. This file contains or is part of one or more small
+# programs. If a small program grows beyond 300 lines, I plan to change
+# to a recommended GPL license.
+
+# Copyright 2024 Ian Kelling
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/mail-setup b/mail-setup
index dcda9e9..d6aedb2 100755
--- a/mail-setup
+++ b/mail-setup
@@ -620,7 +620,7 @@ fi
 
 u /etc/systemd/system/mailclean.timer <<'EOF'
 [Unit]
-Description=Run mailclean daily
+Description=Run mailclean
 
 [Timer]
 OnCalendar=monthly
@@ -637,7 +637,7 @@ After=multi-user.target
 [Service]
 User=$u
 Type=oneshot
-ExecStart=/usr/local/bin/sysd-mail-once mailclean /a/bin/distro-setup/mailclean
+ExecStart=/usr/local/bin/sysd-mail-once mailclean /usr/local/bin/mailclean
 EOF
 
 # * postgrey
diff --git a/rshiank b/rshiank
new file mode 100755
index 0000000..dd0fea4
--- /dev/null
+++ b/rshiank
@@ -0,0 +1,45 @@
+#!/bin/bash
+set -eE -o pipefail
+trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
+
+# restricted ssh does not allow arguments, but they exist in $SSH_ORIGINAL_COMMAND
+# debug
+dfile=/tmp/rshiank
+date >>$dfile
+echo SSH_ORIGINAL_COMMAND: $SSH_ORIGINAL_COMMAND >>$dfile
+
+if [[ ! $SSH_ORIGINAL_COMMAND ]]; then
+  echo "no SSH_ORIGINAL_COMMAND" >&2
+  exit 1
+fi
+
+mapfile -t cmds <<'EOF'
+# allow rsyncing into my ~/tmp
+rsync --server -re.iLsfxCIvu --log-format=X --partial . /home/iank/tmp
+EOF
+mapfile -t regex_cmds <<'EOF'
+EOF
+
+allow=false
+for c in "${regex_cmds[@]}"; do
+  if [[ $c == \#* ]]; then continue; fi
+  if [[ $SSH_ORIGINAL_COMMAND =~ $c ]]; then
+    allow=true
+    break
+  fi
+done
+if ! $allow; then
+  for c in "${cmds[@]}"; do
+    # echo "c $c" # debug
+    if [[ $c == \#* ]]; then continue; fi
+    if [[ $SSH_ORIGINAL_COMMAND == "$c" ]]; then
+      allow=true
+      break
+    fi
+  done
+fi
+if $allow; then
+  eval $SSH_ORIGINAL_COMMAND || exit $?
+else
+  echo "rshiank: failed command: $SSH_ORIGINAL_COMMAND" | tee -a $dfile
+fi
diff --git a/script-files b/script-files
index 4e8c413..79a9098 100644
--- a/script-files
+++ b/script-files
@@ -62,6 +62,12 @@ my_bin_files=(
 
   copyq-restart
   toggle-mute
+
+  check-stale-alerts
+  check-radicale
+  mailclean
+  bk-backup
+  eggdrop-upgrade
 )
 
 for f in /b/log-quiet/*; do
diff --git a/switch-mail-host b/switch-mail-host
index 053134a..d129b62 100644
--- a/switch-mail-host
+++ b/switch-mail-host
@@ -58,11 +58,11 @@ restore_old_btrbk=false
 err-cleanup() {
   if $restore_new_btrbk; then
     e WARNING: due to failure, btrbk.timer may need manual restoration:
-    e $new_shell systemctl start btrbk.timer
+    e $new_shell systemctl enable --now btrbk.timer
   fi
   if $restore_old_btrbk; then
     e WARNING: due to failure, btrbk.timer may need manual restoration:
-    e $old_shell systemctl start btrbk.timer
+    e $old_shell systemctl enable --now btrbk.timer
   fi
 }
 
@@ -260,11 +260,11 @@ fi
 ########### end initial processing, begin actually modifying things ##########
 
 if $new_shell systemctl is-active btrbk.timer; then
-  m $new_shell systemctl stop btrbk.timer
+  m $new_shell systemctl disable --now btrbk.timer
   restore_new_btrbk=true
 fi
 if $old_shell systemctl is-active btrbk.timer; then
-  m $old_shell systemctl stop btrbk.timer
+  m $old_shell systemctl disable --now btrbk.timer
   restore_old_btrbk=true
 fi