set -e; . /usr/local/lib/bash-bear; set +e
shopt -s nullglob
+pre=btrbk-run
+script_name="${BASH_SOURCE[0]}"
+script_name="${script_name##*/}"
+
usage() {
cat <<'EOF'
btrbk-run [OPTIONS] [run|resume|archive]
exit $1
}
-
-
-pre=btrbk-run
-
-
-
-script_name="${BASH_SOURCE[0]}"
-script_name="${script_name##*/}"
-
-
log-setup() {
if [[ ! $log_path ]]; then
mkdir -p /var/log/btrbk
# q has sensitive data i dont want to backup for so long
cat >>/etc/btrbk$conf_suf.conf <<EOF
snapshot_preserve $q_preserve
-snapshot_preserve_min 2h
snapshot_dir btrbk
target_preserve $q_preserve
-target_preserve_min 2h
+target_preserve_min $preserve_min_this_run
EOF
;;
esac
}
+write-config() {
+
+ cat >/etc/btrbk$conf_suf.conf <<EOF
+ssh_identity /q/root/h
+# if there is some problem with our ssh filter, we could use this temporarily.
+#ssh_identity /root/.ssh/home
+
+# trying this out
+#stream_compress zstd
+
+# so we only run one at a time
+lockfile /var/lock/btrbk$conf_suf.lock
+
+# default format of short does not accomidate hourly preservation setting
+timestamp_format long-iso
+
+# only make a snapshot if things have changed
+snapshot_create onchange
+# I could make this different from target_preserve,
+# if one disk had less space.
+# for now, keeping them equal.
+snapshot_preserve $std_preserve
+snapshot_preserve_min $preserve_min_this_run
+snapshot_dir btrbk
+# so, total backups = ~58
+target_preserve $std_preserve
+target_preserve_min $preserve_min_this_run
+
+# it seems likely that not doing this could result in clone source not found
+# errors. For example when expiry happens differently on different hosts,
+# also, as btrbk does by default, if a failed send happens, on the next run it
+# will warn about a stray subvolume, but then with the default setting,
+# it seems that it could create a backup of a newer subvol
+# and use an older subvol as the parent. That could lead to data being stored
+# in different subvolumes on different machines, thus, clone source error.
+incremental_prefs sao:1
+
+# if something fails and it's not obvious, try doing
+# btrbk -l trace -v dryrun
+
+rate_limit $rate_limit
+EOF
+
+ if $incremental_strict; then
+ cat >>/etc/btrbk$conf_suf.conf <<EOF
+incremental strict
+EOF
+ fi
+
+ # make /q be last
+ mp_count=${#mountpoints[@]}
+ for (( i=0; i < mp_count - 1 ; i++ )); do
+ if [[ ${mountpoints[i]} == /q ]]; then
+ unset "mountpoints[i]"
+ mountpoints+=(/q)
+ fi
+ done
+
+
+
+ snap_list_cmds=()
+ local_snap_list_cmds=()
+ tg_snaps=()
+ source_snaps=()
+
+ for m in ${mountpoints[@]}; do
+ case $m in
+ /o)
+ vol=/mnt/o
+ ;;
+ *)
+ vol=/mnt/root
+ ;;
+ esac
+
+ sub=${m#/}
+ snap_list_cmds+=("echo $vol/btrbk/$sub.*;")
+ local_snap_list_cmds+=("echo $vol/btrbk/$sub.*")
+
+ if [[ $source ]]; then
+ tmp_a=($vol/btrbk/$sub.*)
+ tg_snaps+=("${tmp_a[*]}")
+ cat >>/etc/btrbk$conf_suf.conf <<EOF
+volume ssh://$bbksource$vol
+subvolume $sub
+EOF
+ qconf
+ cat >>/etc/btrbk$conf_suf.conf <<EOF
+target send-receive $vol/btrbk
+EOF
+ else # we have targets
+ for snap in "$vol/btrbk/$sub."*; do
+ source_snaps[$snap]=t
+ done
+
+ cat >>/etc/btrbk$conf_suf.conf <<EOF
+volume $vol
+subvolume $sub
+EOF
+ qconf
+ for tg in ${targets[@]}; do
+ # handle ipv6
+ if [[ $tg == *:* ]]; then
+ e "note: target is ipv6 address:$tg"
+ tg="[$tg]"
+ fi
+ cat >>/etc/btrbk$conf_suf.conf <<EOF
+target send-receive ssh://$tg$vol/btrbk
+EOF
+ done
+ fi
+ done
+
+ if $conf_only; then
+ mexit 0
+ fi
+
+}
+
+
+get-orphan-tg-snaps() {
+ orphan_tg_snaps=()
+ for (( i=0; i < ${#mountpoints[@]}; i++ )); do
+ orphan_start_count=${#orphan_tg_snaps[@]}
+ tg_snap_count=0
+ for tg_snap in ${tg_snaps[$i]}; do
+ tg_snap_count=$(( tg_snap_count + 1 ))
+ if [[ ! ${source_snaps[$tg_snap]} ]]; then
+ orphan_tg_snaps+=("$tg_snap")
+ fi
+ done
+ orphan_mp_count=$(( ${#orphan_tg_snaps[@]} - orphan_start_count ))
+ # sanity checking
+ tmp=$(( tg_snap_count > 1 && tg_snap_count == orphan_mp_count ))
+ if (( tmp )) ; then
+ die "something went wrong checking orphans on $tg: for mountpoint ${mountpoints[$i]}, $orphan_mp_count"
+ fi
+ done
+}
+
+# Note, this depends on write-config being called first.
+#
+# Delete any subvols on the receiving host that don't exist on the
+# sending host. Otherwise, the receiving host could have snapshots that
+# aren't on the sending side, and thus become odd leaf subvols, and then
+# btrbk could try to use them when we sync back, creating a weird tree
+# instead of linear parent/child relationship. Maybe this could lead to
+# a missing source subvol error, so lets avoid it.
+del-orphan-snaps() {
+ if [[ $source ]]; then
+ tmpstr=$(ssh root@$source "shopt -s nullglob; ${snap_list_cmds[*]}")
+ IFS=" " read -r -a source_snap_list <<<"$tmpstr"
+ for snap in "${source_snap_list[@]}"; do
+ source_snaps[$snap]=t
+ done
+ get-orphan-tg-snaps
+ tmp=$(( ${#orphan_tg_snaps[*]} >= 1 ))
+ if (( tmp )); then
+ d btrfs sub del ${orphan_tg_snaps[*]}
+ fi
+ else # we have targets
+ for tg in ${targets[@]}; do
+ tmp_str=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}")
+ mapfile -t tg_snaps <<<"$tmp_str"
+ get-orphan-tg-snaps
+ tmp=$(( ${#orphan_tg_snaps[*]} >= 1 ))
+ if (( tmp )); then
+ d ssh root@$tg "btrfs sub del ${orphan_tg_snaps[*]}"
+ fi
+ done
+ fi
+}
+
+#### end functions ####
+
# latest $MAIL_HOST
if [[ -e /b/bash_unpublished/source-state ]]; then
source /b/bash_unpublished/source-state
# note q is owned by root:1000
+declare -A source_snaps
mountpoints=()
rsync_mountpoint=/q
conf_only=false
dry_run=false # mostly for testing
rate_limit=no
+# -q and just using the syslog option seemed nice,
+# but it doesn't show when a send has a parent and when it doesn't.
verbose=true; verbose_arg="-l trace"
force=false
if [[ $PPID == 1 ]]; then
std_preserve="36h 14d 8w 24m"
q_preserve="18h 14d 8w"
+preserve_min=6h
+prune=false
case $cmd_arg in
- run|resume) : ;;
+ run|resume)
+ # see notes at the end for why we set this.
+ prune=true
+ ;;
# This works better than the normal archive command. We have to
# specify the mount points, but that is what we are used to doing and
# time s btrbk -v archive /mnt/r7/amy/boot/btrbk ssh://bo/mnt/boot2/btrbk
archive)
cmd_arg=resume
- std_preserve="999h 999d 999w 999m"
- q_preserve="$std_preserve"
- preserve_arg=-p
;;
*) die "untested command arg" ;;
esac
# This is a separate ssh because the command can fail and thatis ok.
if ! $force; then
locked=false
+ # note Invalid MIT-MAGIC-COOKIE-1 keyxscreensaver-command: can't open display :0 is expected if
+ # the system is locked by greeter.
if lock_info=$($ssh_timeout ssh $h DISPLAY=:0 xscreensaver-command -time); then
if [[ $lock_info != *non-blanked* ]]; then
locked=true
fi
-cat >/etc/btrbk$conf_suf.conf <<EOF
-ssh_identity /q/root/h
-#ssh_identity /root/.ssh/home
-
-# trying this out
-#stream_compress zstd
-
-# so we only run one at a time
-lockfile /var/lock/btrbk$conf_suf.lock
-
-# default format of short does not accomidate hourly preservation setting
-timestamp_format long-iso
-
-# only make a snapshot if things have changed
-snapshot_create onchange
-# I could make this different from target_preserve,
-# if one disk had less space.
-# for now, keeping them equal.
-snapshot_preserve $std_preserve
-snapshot_preserve_min 6h
-snapshot_dir btrbk
-# so, total backups = ~58
-target_preserve $std_preserve
-target_preserve_min 6h
-
-# it seems very likely that not doing this could result in clone source not found
-# errors, for example when expiry happens differently on different hosts,
-# also, as btrbk does by default, if a failed send happens, on the next run it
-# will warn about a stray subvolume, but then create a backup of a newer subvol
-# and use an older subvol as the parent.
-incremental_prefs sao:1
-
-# if something fails and it's not obvious, try doing
-# btrbk -l trace -v dryrun
-
-rate_limit $rate_limit
-EOF
-
-if $incremental_strict; then
- cat >>/etc/btrbk$conf_suf.conf <<EOF
-incremental strict
-EOF
-fi
-
-
-# make /q be last
-mp_count=${#mountpoints[@]}
-for (( i=0; i < mp_count - 1 ; i++ )); do
- if [[ ${mountpoints[i]} == /q ]]; then
- unset "mountpoints[i]"
- mountpoints+=(/q)
- fi
-done
-
-
-
-snap_list_cmds=()
-tg_snaps=()
-declare -A source_snaps
-
-for m in ${mountpoints[@]}; do
- case $m in
- /o)
- vol=/mnt/o
- ;;
- *)
- vol=/mnt/root
- ;;
- esac
-
- sub=${m#/}
- snap_list_cmds+=("echo $vol/btrbk/$sub.*;")
-
- if [[ $source ]]; then
- tmp_a=($vol/btrbk/$sub.*)
- tg_snaps+=("${tmp_a[*]}")
- cat >>/etc/btrbk$conf_suf.conf <<EOF
-volume ssh://$bbksource$vol
-subvolume $sub
-EOF
- qconf
- cat >>/etc/btrbk$conf_suf.conf <<EOF
-target send-receive $vol/btrbk
-EOF
- else # we have targets
- for snap in "$vol/btrbk/$sub."*; do
- source_snaps[$snap]=t
- done
-
- cat >>/etc/btrbk$conf_suf.conf <<EOF
-volume $vol
-subvolume $sub
-EOF
- qconf
- for tg in ${targets[@]}; do
- # handle ipv6
- if [[ $tg == *:* ]]; then
- tg="[$tg]"
- fi
- cat >>/etc/btrbk$conf_suf.conf <<EOF
-target send-receive ssh://$tg$vol/btrbk
-EOF
- done
- fi
-done
-
-# Delete any subvols on the receiving host that don't exist on the
-# sending host. Otherwise, the receiving host could have snapshots that
-# aren't on the sending side, and thus become odd leaf subvols, and then
-# btrbk could try to use them when we sync back, creating a weird tree
-# instead of linear parent/child relationship. Maybe this could lead to
-# a missing source subvol error, so lets avoid it.
-
-get-orphan-tg-snaps() {
- orphan_tg_snaps=()
- for (( i=0; i < ${#mountpoints[@]}; i++ )); do
- orphan_start_count=${#orphan_tg_snaps[@]}
- tg_snap_count=0
- for tg_snap in ${tg_snaps[$i]}; do
- tg_snap_count=$(( tg_snap_count + 1 ))
- if [[ ! ${source_snaps[$tg_snap]} ]]; then
- orphan_tg_snaps+=("$tg_snap")
- fi
- done
- orphan_mp_count=$(( ${#orphan_tg_snaps[@]} - orphan_start_count ))
- # sanity checking
- tmp=$(( tg_snap_count > 1 && tg_snap_count == orphan_mp_count ))
- if (( tmp )) ; then
- die "something went wrong checking orphans on $tg: for mountpoint ${mountpoints[$i]}, $orphan_mp_count"
- fi
- done
-}
-
-if [[ $source ]]; then
- for snap in $(ssh root@$source "shopt -s nullglob; ${snap_list_cmds[*]}"); do
- source_snaps[$snap]=t
- done
- get-orphan-tg-snaps
- tmp=$(( ${#orphan_tg_snaps[*]} >= 1 ))
- if (( tmp )); then
- d btrfs sub del ${orphan_tg_snaps[*]}
- fi
-else # we have targets
- for tg in ${targets[@]}; do
- tmp_str=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}")
- mapfile -t tg_snaps <<<"$tmp_str"
- get-orphan-tg-snaps
- tmp=$(( ${#orphan_tg_snaps[*]} >= 1 ))
- if (( tmp )); then
- d ssh root@$tg "btrfs sub del ${orphan_tg_snaps[*]}"
- fi
- done
-fi
-
# todo: umount first to ensure we don't have any errors
# todo: do some kill fuser stuff to make umount more reliable
-
-if $conf_only; then
- mexit 0
+if $prune; then
+ preserve_min_this_run="$preserve_min"
+ write-config
+ # I'd have to do a bit more thinking, but maybe doing this here helps
+ # prune to decide to preserve the same snapshots on different
+ # hosts. Otherwise, this is redundant and not needed.
+ del-orphan-snaps
+ if $dry_run; then
+ m btrbk -c /etc/btrbk$conf_suf.conf -v -n prune
+ else
+ logq btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg prune
+ fi
fi
+preserve_min_this_run=all
+write-config
+del-orphan-snaps
if $dry_run; then
m btrbk -c /etc/btrbk$conf_suf.conf -v -n $cmd_arg
mexit 0
+else
+ logq btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg $cmd_arg
fi
-# -q and just using the syslog option seemed nice,
-# but it doesn't show when a send has a parent and when it doesn't.
-logq btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg $cmd_arg
if $early; then
exit 0
## run extra commands on targets
+
+local_snaps=$(${local_snap_list_cmds[*]})
if [[ $ret == 0 ]]; then
for tg in ${targets[@]}; do
h=$(ssh $tg hostname)
+ remote_snaps=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}")
+ # a check like this will catch the situation we aim to prevent by running purge
+ if [[ $local_snaps != "$remote_snaps" ]]; then
+ localtmp=$(mktemp)
+ printf "%s\n" "$local_snaps" |tr ' ' '\n' >$localtmp
+ remotetmp=$(mktemp)
+ printf "%s\n" "$remote_snaps" |tr ' ' '\n' >$remotetmp
+ e "error: for $tg, remote and local snaps are different."
+ e "local: $local_snaps"
+ e "tg:$tg = $remote_snaps"
+ e "diff -u local remote"
+ diff -u $localtmp $remotetmp
+ rm $localtmp $remotetmp
+ ret=1
+ fi
if [[ $h == kd && $HOSTNAME == x3 && $HOSTNAME == "$MAIL_HOST" ]]; then
d ssh root@$tg 'btrbk-spread-wrap &>/dev/null </dev/null &'
fi
# retention, so I'm using long-iso.
# note to create a long-iso timestamp: date +%Y%m%dT%H%M%S%z
+
+
+
+##### background of why we purge then run ####
+
+# example:
+
+# local computer is so, created /etc/btrbk.conf:
+
+# snapshot_create onchange
+# timestamp_format long-iso
+# snapshot_dir btrbk
+# volume /mnt/root/btest
+# subvolume x
+# snapshot_preserve 18h 14d 8w
+# snapshot_preserve_min 2h
+# snapshot_dir btrbk
+# target_preserve 18h 14d 8w
+# target_preserve_min 2h
+# target send-receive ssh://kd.b8.nz/mnt/root/btest/btrbk/
+
+# #### end conf
+
+# First, I created some incremental backups by manually modifying a test
+# file, then running btrbk. Then I renamed the subvols to be over a set
+# of times when daily retention kicked in, but 2 in the same day. I
+# btrbked the files over to another host, then intentionally deleted
+# some so that:
+
+
+# on so:
+
+# x.20240710T141521-0400
+# x.20240711T141719-0400
+# x.20240711T142115-0400
+# x.20240712T142254-0400
+
+
+# on kd:
+
+# x.20240710T141521-0400
+# x.20240711T141719-0400
+
+
+# btrbk -c /etc/btrbk.conf -n -v run
+# btrbk command line client, version 0.32.6 (Sun Jul 14 14:37:56 2024)
+# Using configuration: /etc/btrbk.conf
+# Snapshot creation skipped: snapshot_create=onchange, snapshot is up-to-date: /mnt/root/btest/btrbk/x.20240712T142254-0400
+# Checking for missing backups of subvolume "/mnt/root/btest/x" in "kd.b8.nz:/mnt/root/btest/btrbk/"
+# Creating incremental backup...
+# [send/receive] target: kd.b8.nz:/mnt/root/btest/btrbk/x.20240712T142254-0400
+# [send/receive] source: /mnt/root/btest/btrbk/x.20240712T142254-0400
+# [send/receive] parent: /mnt/root/btest/btrbk/x.20240711T141719-0400
+# [send/receive] (dryrun, skip) checking target metadata: kd.b8.nz:/mnt/root/btest/btrbk/x.20240712T142254-0400
+# Created 1/1 missing backups
+# Cleaning backups of subvolume "/mnt/root/btest/x": kd.b8.nz:/mnt/root/btest/btrbk/x.*
+# Deleted 0 subvolumes in: kd.b8.nz:/mnt/root/btest/btrbk/x.*
+# Cleaning snapshots: /mnt/root/btest/btrbk/x.*
+# [delete] target: /mnt/root/btest/btrbk/x.20240711T142115-0400
+# Deleted 1 subvolumes in: /mnt/root/btest/btrbk/x.*
+# Completed within: 0s (Sun Jul 14 14:37:56 2024)
+# --------------------------------------------------------------------------------
+# Backup Summary (btrbk command line client, version 0.32.6)
+
+# Date: Sun Jul 14 14:37:56 2024
+# Config: /etc/btrbk.conf
+# Dryrun: YES
+
+# Legend:
+# === up-to-date subvolume (source snapshot)
+# +++ created subvolume (source snapshot)
+# --- deleted subvolume
+# *** received subvolume (non-incremental)
+# >>> received subvolume (incremental)
+# --------------------------------------------------------------------------------
+# /mnt/root/btest/x
+# === /mnt/root/btest/btrbk/x.20240712T142254-0400
+# --- /mnt/root/btest/btrbk/x.20240711T142115-0400
+# >>> kd.b8.nz:/mnt/root/btest/btrbk/x.20240712T142254-0400
+
+# NOTE: Dryrun was active, none of the operations above were actually executed!
+
+# ### end output
+
+# What we actually want is:
+
+# [send/receive] target: kd.b8.nz:/mnt/root/btest/btrbk/x.20240711T142115-0400
+# [send/receive] source: /mnt/root/btest/btrbk/x.20240711T142115-0400
+# [send/receive] parent: /mnt/root/btest/btrbk/x.20240711T141719-0400
+
+# Note: I would expect btrbk -p to do this, but it actually works differently than preserve_min all
+
+# This is a good guess as to the source of my periodic clone source error,
+# however, when I hit that error, and then ran btrfs with correctly ordered
+# subvolumes, it did not fix the error. Deleting the parent subvolume on the
+# target host did solve the error. But, it could have been due to this thing
+# happening in an earlier send. I'll give it a year or so of testing.
+#
+# 2024-07-05 23:00:34 ___ [stderr] ERROR: clone: cannot find source subvol 3439b7f8-7130-e740-970c-9c21f5b1110b
+#
+# Note, the error is confusing because there exists a subvol with that
+# uuid, but I'm pretty sure what it is actually saying is that it can't
+# find some data it needs within that subvol when it expected to. At one
+# point, I had figured out a way to see the exact file it was failing
+# on, but last time I looked, I didn't see an easy way to do it. It
+# might be in my post to linux-btrfs about this.