#!/bin/bash
# Configure & run btrbk & related work on Ian's computers.
# Copyright (C) 2024 Ian Kelling
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# SPDX-License-Identifier: GPL-3.0-or-later
# todo: if we cancel in the middle of a btrfs send, then run again
# immediately, the received subvolume doesn't get a Received UUID:
# field, and we won't mount it. Need to figure out a solution that will
# fix this.
[[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
set -e; . /usr/local/lib/bash-bear; set +e
shopt -s nullglob
usage() {
cat <<'EOF'
btrbk-run [OPTIONS] [run|resume|archive]
usually -t TARGET_HOST or -s SOURCE_HOST
Note, at source location, intentionally not executable, run and read
install-my-scripts.
EOF
echo "top of script file:"
sed -n '1,/^[# ]*end command line/{p;b};q' "$0"
exit $1
}
pre=btrbk-run
script_name="${BASH_SOURCE[0]}"
script_name="${script_name##*/}"
log-setup() {
if [[ ! $log_path ]]; then
mkdir -p /var/log/btrbk
log_path=/var/log/btrbk/$(date +%F_%T%:::z).log
fi
}
d() {
if $dry_run || $conf_only; then
printf "$pre dry-run: %s\n" "$*"
else
log-setup
printf "$pre running: %s\n" "$*" |& pee cat 'ts "%F %T" >>'$log_path
"$@" |& pee cat 'ts "%F %T" >>'$log_path
fi
}
m() { if $verbose; then printf "$pre %s\n" "$*"; fi; "$@"; }
e() { printf "$pre %s\n" "$*"; }
logq() {
local exit_code
exit_code=0
log-setup
printf "$pre running: %s\n" "$*" | pee cat 'ts "%F %T" >>'$log_path
e logging to $log_path
"$@" |& ts "%F %T" >>$log_path || exit_code=$?
printf "$pre exit code:%s of %s\n" "$exit_code" "$*" | pee cat 'ts "%F %T" >>'$log_path
if (( exit_code > 0 )); then
e "error: command exit code: $exit_code. exiting after tail -n50 $log_path"
tail -n50 $log_path
exit $exit_code
fi
}
die() { printf "$pre error: %s\n" "$*" >&2; echo "$pre exiting with status 1" >&2; exit 1; }
mexit() { echo "$pre exiting with status $1"; exit $1; }
uninstalled-file-die() {
die "file $1 is not latest. run install-my-scripts or rerun with -f"
}
set-location() {
case $HOSTNAME in
kw)
at_work=true
;;
kd|frodo)
at_home=true
;;
x2|x3|sy|so)
if [[ $(timeout 1 dig +short @10.2.0.1 -x 10.2.0.2 2>&1 ||:) == kd.b8.nz. ]] \
&& ip n show 10.2.0.1 | grep . &>/dev/null; then
# note: logic duplicated in 11-iank
at_home=true
elif ping -q -c1 -w1 hal.office.fsf.org &>/dev/null \
&& ip n show 192.168.0.26 | grep . &>/dev/null; then
at_work=true
fi
;;
esac
}
exit-if-no-default-targets() {
if ! $force && [[ $HOSTNAME != "$MAIL_HOST" ]]; then
echo "MAIL_HOST=$MAIL_HOST, nothing to do"
mexit 0
fi
case $HOSTNAME in
kw|kd|frodo|x2|x3|sy|so) : ;;
*)
die "error: no default targets for this host, use -t"
;;
esac
}
add-x3-target() {
# main work machine
if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then
targets+=(x3.office.fsf.org)
elif ping -q -c1 -w1 x3.b8.nz &>/dev/null; then
# in case we took it home
targets+=(x3.b8.nz)
elif ping -q -c1 -w1 x3w.b8.nz &>/dev/null; then
targets+=(x3w.b8.nz)
else
targets+=(x3wg.b8.nz)
fi
}
add-wireless-target() {
local host
if [[ ! $1 ]]; then
set -- $h
fi
for host; do
# c = cabled, w = wireless
if ping -q -c1 -w1 ${host}c.b8.nz &>/dev/null; then
targets+=(${host}c.b8.nz)
elif ping -q -c1 -w1 $host.b8.nz &>/dev/null; then
targets+=($host.b8.nz)
elif ping -q -c1 -w1 ${host}w.b8.nz &>/dev/null; then
targets+=(${host}w.b8.nz)
fi
done
}
qconf() {
case $sub in
q)
# q has sensitive data i dont want to backup for so long
cat >>/etc/btrbk$conf_suf.conf < >(sed '/No such file or directory/d'); then
# shellcheck disable=SC2046 # we want word splitting
set -- $(< $once_args_file-tmp) "$@"
# i havent used this feature yet, so warn about it
echo "$0: btrbk-run options set in $once_args_file:"
cat $once_args_file-tmp
rm -f $once_args_file-tmp
fi
targets=()
early=false
fast=false
kd_spread=false
check_installed=false
orig_args=("$@")
temp=$(getopt -l check-installed,fast,pull-reexec,help 23acefikl:m:npqrs:t:vh "$@") || usage 1
eval set -- "$temp"
while true; do
case $1 in
# for the rare case we want to run multiple instances at the same time
-2) conf_suf=2 ;;
-3) conf_suf=3 ;;
-a)
# all moiuntpoints
mountpoints=(/a /o /qr /qd /q)
;;
# only creates the config file, does not run btrbk
-c) conf_only=true ;;
--check-installed)
check_installed=true
;;
# quit early, just btrbk, no extra remounting etc.
-e) early=true ;;
# avoids some default behaviors:
# - no skipping hosts where xprintidle haven't been idle recently
# - exit if we can't ssh to 1 or more hosts
# - still set default hosts despite MAIL_HOST status
-f) force=true ;;
# skip various checks. when we run twice in a row for
# switch mail-host, no need to repeat the same checks again.
--fast) fast=true ;;
-i) incremental_strict=true ;;
# note this implies resume and -p because it is just meant to make
# other hosts have the same snapshots, not do any expiry or new
# backups.
-k) kd_spread=true ;;
# bytes per second, suffix k m g
-l) rate_limit=$2; shift ;;
# Comma separated mountpoints to backup. This has defaults set below.
-m) IFS=, mountpoints=($2); unset IFS; shift ;;
-n) dry_run=true ;;
# preserve existing snapshots and backups
-p) preserve_arg=-p ;;
# internal option for rerunning under newer SOURCE_HOST version.
--pull-reexec) pull_reexec=true;;
# quiet
-q) verbose=false; verbose_arg=; progress_arg= ;;
# source host to receive a backup from
-s)
source=$2
bbksource=$source
if [[ $source == *:* ]]; then
bbksource="[$source]"
fi
shift
;;
# target hosts to send to. empty is valid for just doing local
# snapshot. we have default hosts we will populate.
-t) IFS=, targets=($2); unset IFS; shift ;;
# verbose.
-v)
verbose=true; verbose_arg="-l trace"
;;
-h|--help) usage ;;
--) shift; break ;;
*) die "Internal error!" ;;
esac
shift
done
cmd_arg="$1"
if ! $force && { $check_installed || [[ ! $source ]]; } ; then
install_bin_files=(
mount-latest-subvol
check-subvol-stale
btrbk-run
)
for f in ${install_bin_files[@]}; do
if ! diff -q /a/bin/ds/$f /usr/local/bin/$f; then
uninstalled-file-die $f
fi
done
if ! diff -q /a/bin/bash-bear-trap/bash-bear /usr/local/lib/bash-bear; then
uninstalled-file-die bash-bear
fi
if $check_installed; then
exit 0
fi
fi
if $kd_spread; then
if [[ $cmd_arg && $cmd_arg != resume ]]; then
die "dont pass -k without resume or empty run arg"
fi
if [[ $HOSTNAME == "$MAIL_HOST" ]]; then
die "something went wrong, -k not meant to be run on MAIL_HOST"
fi
if [[ $HOSTNAME != kd ]]; then
die "something went wrong, -k only meant to run on kd"
fi
cmd_arg=resume
preserve_arg=-p
add-wireless-target sy so
fi
if [[ ! $cmd_arg ]]; then
cmd_arg=run
fi
std_preserve="36h 14d 8w 24m"
q_preserve="18h 14d 8w"
case $cmd_arg in
run|resume) : ;;
# This works better than the normal archive command. We have to
# specify the mount points, but that is what we are used to doing and
# we prefer it. Another difference is that archive works recursively
# and we don't care about that. Sometimes we may still want to run
# btrbk archive, but it doesn't even use the config file, so just
# run it directly, eg:
# time s btrbk -v archive /mnt/r7/amy/boot/btrbk ssh://bo/mnt/boot2/btrbk
archive)
cmd_arg=resume
std_preserve="999h 999d 999w 999m"
q_preserve="$std_preserve"
preserve_arg=-p
;;
*) die "untested command arg" ;;
esac
if (( $# > 1 )); then
die: "only 1 nonoption arg is supported"
fi
if [[ -v targets && $source ]]; then
# note, this doesnt need to be the case, but
# we would need to think about it.
die "error: -t and -s are mutually exclusive"
fi
### end options parsing
# remove path from earlier version of btrbk
rm -f /usr/sbin/btrbk
# note, this still works as intended if there is no /usr/bin/btrbk
if [[ /a/opt/btrbk/btrbk -nt /usr/bin/btrbk ]]; then
if [[ -e /b/distro-functions/src/package-manager-abstractions ]]; then
. /b/distro-functions/src/package-manager-abstractions
pi asciidoctor
fi
cd /a/opt/btrbk
m make install
cd /
fi
# TODO: i wonder if there should be an option to send to the default
# targets, plus any given on the command line.
at_work=false
at_home=false
# set default targets
if [[ ! -v targets && ! $source ]]; then
exit-if-no-default-targets
set-location
if $at_home; then
if ! $kd_spread && [[ $HOSTNAME != x3 ]]; then
add-x3-target
fi
if [[ $HOSTNAME != kd ]]; then
targets+=(kd.b8.nz)
fi
wireless_home_hosts=(
x2
sy
so
)
for h in ${wireless_home_hosts[@]}; do
if [[ $HOSTNAME != "$h" ]]; then
add-wireless-target
fi
done
elif $at_work; then
targets+=(b8.nz)
for h in x2 x3 kw; do
if [[ $HOSTNAME == "$h" ]]; then
continue
fi
if ping -q -c1 -w1 $h.office.fsf.org &>/dev/null; then
targets+=($h.office.fsf.org)
fi
done
else
targets+=(b8.nz)
fi
fi
if [[ ${mountpoints[0]} ]]; then
for mp in ${mountpoints[@]}; do
if [[ -e /nocow/btrfs-stale/$mp ]]; then
die "error: $mp is stale, mount-latest-subvol first"
fi
done
else
# set default mountpoints
if [[ ${targets[0]} == tp ]]; then
prospective_mps=(/a)
else
case $HOSTNAME in
*)
prospective_mps=()
if [[ $source ]]; then
source_state="$(ssh $source 'cat /a/bin/bash_unpublished/source-state; echo source_host=$HOSTNAME')"
eval "$source_state"
# shellcheck disable=SC2154 # assigned in the above eval.
if [[ $source_host == "$MAIL_HOST" ]]; then
prospective_mps+=(/o)
fi
if [[ $source_host == "$HOST2" ]]; then
prospective_mps+=(/a /qr /qd /q)
fi
else
if [[ $HOSTNAME == "$MAIL_HOST" ]]; then
prospective_mps+=(/o)
fi
if [[ $HOSTNAME == "$HOST2" ]]; then
prospective_mps+=(/a /qr /qd /q)
fi
if $kd_spread; then
prospective_mps=(/a /o /qr /qd /q)
fi
fi
# note: put q last just in case its specific retention options were to
# affect other config sections. I havent tested if that is the case.
;;
esac
fi
for mp in ${prospective_mps[@]}; do # default mountpoints to sync
if [[ -e /nocow/btrfs-stale/$mp ]]; then
e "warning: $mp stale, not adding to default mountpoints"
continue
fi
if awk '{print $2}' /etc/fstab | grep -xF $mp &>/dev/null; then
mountpoints+=($mp)
fi
done
fi
tmp=$(( ${#mountpoints[@]} == 0 ))
if (( tmp )); then
die didnt get mountpoint arg and had no defaults
fi
##### end command line parsing ########
#### begin pre-checks #####
# todo: this has a timing problem, since btrbk.timer could activate the service after this check.
if ! $fast && [[ $source ]]; then
if [[ $(ssh $source ps --no-headers -o comm 1) == systemd ]]; then
status=$(ssh $source systemctl is-active btrbk.service) || : # normally returns 3
case $status in
inactive|failed) : ;;
*)
echo "$0: error: btrbk is running on source. exiting out of caution"
mexit 1
esac
fi
fi
if ! command -v btrbk &>/dev/null; then
die "error: no btrbk binary found"
fi
# pull_reexec stops us from getting into an infinite loop if there is some
# kind of weird problem
pulla=false
for m in "${mountpoints[@]}"; do
if [[ $m == /a ]]; then
pulla=true
break
fi
done
if ! $pull_reexec && [[ $source ]] && $pulla && ! $force ; then
ssh root@$source btrbk-run --check-installed
fi
#### end pre-checks #####
# print some non-default opts
if $verbose; then
opts_show=()
if ! $conf_only; then
opts_show+=(conf_only=true)
fi
if ! $dry_run; then
opts_show+=(dry_run=true)
fi
if [[ $rate_limit != no ]]; then
opts_show+=("rate_limit=$rate_limit")
fi
if [[ $cmd_arg != run ]]; then
opts_show+=(cmd_arg=$cmd_arg)
fi
if (( ${#opts_show[@]} >= 1 )); then
first=true
for opt in ${opts_show[@]}; do
if $first; then
printf "%s" "$opt"
first=false
else
printf " %s" "$opt"
fi
done
echo
fi
fi
if ! $pull_reexec && [[ $source ]] && $pulla ; then
tmpf=$(mktemp)
m rsync -ra $source:/usr/local/bin/{mount-latest-subvol,check-subvol-stale} /usr/local/bin
m rsync -ra $source:/usr/local/lib/bash-bear /usr/local/lib
m scp $source:/a/bin/distro-setup/btrbk-run $tmpf
if ! diff -q $tmpf ${BASH_SOURCE[0]}; then
e "found different version on host $source. reexecing"
install -T $tmpf /usr/local/bin/btrbk-run
m /usr/local/bin/btrbk-run --pull-reexec "${orig_args[@]}"
mexit 0
fi
fi
if [[ -v targets ]]; then
echo "targets: ${targets[*]}"
fi
if [[ $source ]]; then
echo "source: $source"
fi
echo "mountpoints: ${mountpoints[*]}"
# todo: check if we have no snapshots yet, because I always want to run
# archive instead of run. Likely, I should give an error unless a cli
# override is passed. perhaps check-subvol-stale could give the error.
# see the error message "no snapshots found" in that file.
if ! $fast; then
# if our mountpoints are from stale snapshots,
# it doesn't make sense to do a backup.
m check-subvol-stale ${mountpoints[@]} || die "found stale mountpoints in ${mountpoints[*]}"
# for an initial run, btrbk requires the dir to exist.
mkdir -p /mnt/{root,o}/btrbk
fi
local_zone=$(date +%z)
if [[ $source ]]; then
if $fast; then
zone=$local_zone
else
if ! zone=$(ssh root@$source date +%z); then
if $conf_only; then
echo "$0: warning: failed to ssh to root@$source"
else
die failed to ssh to root@$source
fi
fi
if [[ $zone != "$local_zone" ]]; then
die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost"
fi
fi
else
sshable=()
sshfail=()
remote_str_cmd="mkdir -p /mnt/root/btrbk /mnt/o/btrbk && \
date +%z && \
df --output=size,pcent / | tail -n1"
for h in ${targets[@]}; do
if $fast || $conf_only; then
# Use some typical values in this case
root_size=$(( 1024 * 1024 * 2000 )) #2tb
percent_used=10
zone=$(date +%z)
elif remote_str=$(timeout -s 9 6 ssh root@$h "$remote_str_cmd"); then
mapfile -t tmp_array <<<"$remote_str"
zone="${tmp_array[0]}"
IFS=" " read -r root_size percent_used <<<"${tmp_array[1]}"
percent_used=${percent_used%%%}
tmp=$(( ${#tmp_array[@]} != 2 ))
if (( tmp )); then
die "error: didnt get 2 lines in test ssh to target $h. investigate"
fi
case $percent_used in
[0-9]|[1-9][0-9]) : ;;
*)
die "error: didnt get percent disk use in test ssh to target $h. investigate"
;;
esac
else
sshfail+=($h)
continue
fi
# we may be booted into a bootstrap fs or something
min_root_kb=$(( 1024 * 1024 * 200 )) # 200 gb
tmp=$(( root_size < min_root_kb ))
if (( tmp )); then
e "warning: $h: root_size=$root_size < 200gb, perhaps it is booted to bootstrap vol. skipping for now"
continue
fi
tmp=$(( percent_used >= 98 ))
if (( tmp )); then
die "error: filesystem on target $h is $percent_used % full"
fi
# on sy, xprintidle is resetting every 12 seconds even when not
# idle, i dunno why, instead we are checking if the screen is locked,
# which is good enough.
#
# This is a separate ssh because the command can fail and thatis ok.
if ! $force; then
locked=false
if lock_info=$(timeout -s 9 6 ssh $h DISPLAY=:0 xscreensaver-command -time); then
if [[ $lock_info != *non-blanked* ]]; then
locked=true
fi
else
locked=true
fi
if ! $locked; then
# Ignore this host. i sometimes use a non-main machine for
# testing or web browsing, knowing that everything will be wiped
# by the next backup, but I dont want it to happen as Im using
# it from cronjob.
e "warning: $h: seems to be actively in use, skipping for now"
continue
fi
fi
sshable+=($h)
if [[ $zone != "$local_zone" ]]; then
die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost"
fi
done
if [[ ! ${sshable[*]} ]] || { $force && [[ ${sshfail[*]} ]]; }; then
die "see skipped host warning above or sshfail hosts: ${sshfail[*]}"
else
if [[ ${sshfail[*]} ]]; then
ret=1
e "error: failed to ssh to ${sshfail[*]} but continuing with other hosts"
fi
targets=(${sshable[@]})
fi
fi
cat >/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf < 1 && tg_snap_count == orphan_mp_count ))
if (( tmp )) ; then
die "something went wrong checking orphans on $tg: for mountpoint ${mountpoints[$i]}, $orphan_mp_count"
fi
done
}
if [[ $source ]]; then
for snap in $(ssh root@$source "shopt -s nullglob; ${snap_list_cmds[*]}"); do
source_snaps[$snap]=t
done
get-orphan-tg-snaps
tmp=$(( ${#orphan_tg_snaps[*]} >= 1 ))
if (( tmp )); then
d btrfs sub del ${orphan_tg_snaps[*]}
fi
else # we have targets
for tg in ${targets[@]}; do
tmp_str=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}")
mapfile -t tg_snaps <<<"$tmp_str"
get-orphan-tg-snaps
tmp=$(( ${#orphan_tg_snaps[*]} >= 1 ))
if (( tmp )); then
d ssh root@$tg "btrfs sub del ${orphan_tg_snaps[*]}"
fi
done
fi
# todo: umount first to ensure we don't have any errors
# todo: do some kill fuser stuff to make umount more reliable
if $conf_only; then
mexit 0
fi
if $dry_run; then
m btrbk -c /etc/btrbk$conf_suf.conf -v -n $cmd_arg
mexit 0
fi
# -q and just using the syslog option seemed nice,
# but it doesn't show when a send has a parent and when it doesn't.
logq btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg $cmd_arg
if $early; then
exit 0
fi
# todo: tp not valid anymore.
# if we have it, sync to systems which don't
if mountpoint $rsync_mountpoint >/dev/null; then
for tg in ${targets[@]}; do
case $tg in
tp)
dirs=(/p/c/machine_specific/tp)
for x in /p/c/machine_specific/*.hosts; do
if grep -qxF $tg $x; then
dirs+=(${x%.hosts})
fi
done
m rsync -aSAXPH --specials --devices --delete --relative ${dirs[@]} root@$tg:/
;;
esac
done
fi
subvols=()
for mp in "${mountpoints[@]}"; do
subvols+=("${mp##*/}")
done
if [[ $source ]]; then
d mount-latest-subvol "${subvols[@]}"
else
for tg in ${targets[@]}; do
d /a/exe/mount-latest-remote "$tg" "${subvols[@]}" || ret=$?
done
fi
# todo, we get hostnames earlier, reuse that.
if [[ $ret == 0 ]]; then
for tg in ${targets[@]}; do
h=$(ssh $tg hostname)
if [[ $h == kd && $HOSTNAME == x3 && $HOSTNAME == "$MAIL_HOST" ]]; then
d ssh root@$tg 'btrbk-spread-wrap &>/dev/null