#!/bin/bash # Configure & run btrbk & related work on Ian's computers. # Copyright (C) 2024 Ian Kelling # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # SPDX-License-Identifier: GPL-3.0-or-later # todo: if we cancel in the middle of a btrfs send, then run again # immediately, the received subvolume doesn't get a Received UUID: # field, and we won't mount it. Need to figure out a solution that will # fix this. [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@" set -e; . /usr/local/lib/bash-bear; set +e shopt -s nullglob usage() { cat <<'EOF' btrbk-run [OPTIONS] [run|resume|archive] usually -t TARGET_HOST or -s SOURCE_HOST Note, at source location, intentionally not executable, run and read install-my-scripts. EOF echo "top of script file:" sed -n '1,/^[# ]*end command line/{p;b};q' "$0" exit $1 } pre=btrbk-run script_name="${BASH_SOURCE[0]}" script_name="${script_name##*/}" d() { if $dry_run || $conf_only; then printf "$pre dry-run: %s\n" "$*" else printf "$pre running: %s\n" "$*" "$@" fi } m() { if $verbose; then printf "$pre %s\n" "$*"; fi; "$@"; } e() { printf "$pre %s\n" "$*"; } die() { printf "$pre error: %s\n" "$*" >&2; echo "$pre exiting with status 1" >&2; exit 1; } mexit() { echo "$pre exiting with status $1"; exit $1; } uninstalled-file-die() { die "uninstalled file $1. run install-my-scripts or rerun with -f" } set-location() { case $HOSTNAME in kw) at_work=true ;; kd|frodo) at_home=true ;; x2|x3|sy) if [[ $(dig +short @10.2.0.1 -x 10.2.0.2 2>&1 ||:) == kd.b8.nz. ]] \ && ip n show 10.2.0.1 | grep . &>/dev/null; then at_home=true elif ping -q -c1 -w1 hal.office.fsf.org &>/dev/null \ && ip n show 192.168.0.26 | grep . &>/dev/null; then at_work=true fi ;; esac } exit-if-no-default-targets() { if ! $force && [[ $HOSTNAME != "$MAIL_HOST" ]]; then echo "MAIL_HOST=$MAIL_HOST, nothing to do" mexit 0 fi case $HOSTNAME in kw|kd|frodo|x2|x3|sy) : ;; *) die "error: no default targets for this host, use -t" ;; esac } add-x3-target() { # main work machine if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then targets+=(x3.office.fsf.org) elif ping -q -c1 -w1 $h.b8.nz &>/dev/null; then # in case we took it home targets+=(x3.b8.nz) elif ping -q -c1 -w1 ${h}w.b8.nz &>/dev/null; then targets+=(x3w.b8.nz) else targets+=(x3wg.b8.nz) fi } add-wireless-target-h() { if ping -q -c1 -w1 $h.b8.nz &>/dev/null; then targets+=($h.b8.nz) elif ping -q -c1 -w1 ${h}w.b8.nz &>/dev/null; then targets+=(${h}w.b8.nz) fi } qconf() { case $sub in q) # q has sensitive data i dont want to backup for so long cat >>/etc/btrbk$conf_suf.conf < >(sed '/No such file or directory/d'); then # shellcheck disable=SC2046 # we want word splitting set -- $(< $once_args_file-tmp) "$@" # i havent used this feature yet, so warn about it echo "$0: btrbk-run options set in $once_args_file:" cat $once_args_file-tmp rm -f $once_args_file-tmp fi targets=() early=false fast=false kd_spread=false check_installed=false orig_args=("$@") temp=$(getopt -l check-installed,fast,pull-reexec,help 23cefikl:m:npqrs:t:vh "$@") || usage 1 eval set -- "$temp" while true; do case $1 in # for the rare case we want to run multiple instances at the same time -2) conf_suf=2 ;; -3) conf_suf=3 ;; # only creates the config file, does not run btrbk -c) conf_only=true ;; --check-installed) check_installed=true ;; # quit early, just btrbk, no extra remounting etc. -e) early=true ;; # avoids some default behaviors: # - no skipping hosts where xprintidle haven't been idle recently # - exit if we can't ssh to 1 or more hosts # - still set default hosts despite MAIL_HOST status -f) force=true ;; # skip various checks. when we run twice in a row for # switch mail-host, no need to repeat the same checks again. --fast) fast=true ;; -i) incremental_strict=true ;; # note this implies resume and -p because it is just meant to make # other hosts have the same snapshots, not do any expiry or new # backups. -k) kd_spread=true ;; # bytes per second, suffix k m g -l) rate_limit=$2; shift ;; # Comma separated mountpoints to backup. This has defaults set below. -m) IFS=, mountpoints=($2); unset IFS; shift ;; -n) dry_run=true ;; # preserve existing snapshots and backups -p) preserve_arg=-p ;; # internal option for rerunning under newer SOURCE_HOST version. --pull-reexec) pull_reexec=true;; # quiet -q) verbose=false; verbose_arg=; progress_arg= ;; # source host to receive a backup from -s) source=$2 bbksource=$source if [[ $source == *:* ]]; then bbksource="[$source]" fi shift ;; # target hosts to send to. empty is valid for just doing local # snapshot. we have default hosts we will populate. -t) IFS=, targets=($2); unset IFS; shift ;; # verbose. -v) verbose=true; verbose_arg=-v ;; -h|--help) usage ;; --) shift; break ;; *) die "Internal error!" ;; esac shift done cmd_arg="$1" if ! $force && { $check_installed || [[ ! $source ]]; } ; then install_bin_files=( mount-latest-subvol check-subvol-stale btrbk-run ) for f in ${install_bin_files[@]}; do if ! diff -q /a/bin/ds/$f /usr/local/bin/$f; then uninstalled-file-die $f fi done if ! diff -q /a/bin/bash-bear-trap/bash-bear /usr/local/lib/bash-bear; then uninstalled-file-die err fi if $check_installed; then exit 0 fi fi if $kd_spread; then if [[ $cmd_arg && $cmd_arg != resume ]]; then die "dont pass -k without resume or empty run arg" fi if [[ $HOSTNAME == "$MAIL_HOST" ]]; then die "something went wrong, -k not meant to be run on MAIL_HOST" fi if [[ $HOSTNAME != kd ]]; then die "something went wrong, -k only meant to run on kd" fi cmd_arg=resume preserve_arg=-p h=sy add-wireless-target-h fi if [[ ! $cmd_arg ]]; then cmd_arg=run fi std_preserve="36h 14d 8w 24m" q_preserve="18h 14d 8w" case $cmd_arg in run|resume) : ;; # This works better than the normal archive command. We have to # specify the mount points, but that is what we are used to doing and # we prefer it. Another difference is that archive works recursively # and we don't care about that. Sometimes we may still want to run # btrbk archive, but it doesn't even use the config file, so just # run it directly, eg: # time s btrbk -v archive /mnt/r7/amy/boot/btrbk ssh://bo/mnt/boot2/btrbk archive) cmd_arg=resume std_preserve="999h 999d 999w 999m" q_preserve="$std_preserve" preserve_arg=-p ;; *) die "untested command arg" ;; esac if (( $# > 1 )); then die: "only 1 nonoption arg is supported" fi if [[ -v targets && $source ]]; then # note, this doesnt need to be the case, but # we would need to think about it. die "error: -t and -s are mutually exclusive" fi ### end options parsing # remove path from earlier version of btrbk rm -f /usr/sbin/btrbk # note, this still works as intended if there is no /usr/bin/btrbk if [[ /a/opt/btrbk/btrbk -nt /usr/bin/btrbk ]]; then if [[ -e /b/distro-functions/src/package-manager-abstractions ]]; then . /b/distro-functions/src/package-manager-abstractions pi asciidoctor fi cd /a/opt/btrbk m make install cd / fi # TODO: i wonder if there should be an option to send to the default # targets, plus any given on the command line. at_work=false at_home=false # set default targets if [[ ! -v targets && ! $source ]]; then exit-if-no-default-targets set-location if $at_home; then if ! $kd_spread && [[ $HOSTNAME != x3 ]]; then add-x3-target fi if [[ $HOSTNAME != kd ]]; then targets+=(kd.b8.nz) fi wireless_home_hosts=( x2 sy ) for h in ${wireless_home_hosts[@]}; do if [[ $HOSTNAME != "$h" ]]; then add-wireless-target-h fi done elif $at_work; then targets+=(i.b8.nz) for h in x2 x3 kw; do if [[ $HOSTNAME == "$h" ]]; then continue fi if ping -q -c1 -w1 $h.office.fsf.org &>/dev/null; then targets+=($h.office.fsf.org) fi done else targets+=(i.b8.nz) fi fi if [[ ${mountpoints[0]} ]]; then for mp in ${mountpoints[@]}; do if [[ -e /nocow/btrfs-stale/$mp ]]; then die "error: $mp is stale, mount-latest-subvol first" fi done else # set default mountpoints if [[ ${targets[0]} == tp ]]; then prospective_mps=(/a) else case $HOSTNAME in *) prospective_mps=() if [[ $source ]]; then source_state="$(ssh $source 'cat /a/bin/bash_unpublished/source-state; echo source_host=$HOSTNAME')" eval "$source_state" # shellcheck disable=SC2154 # assigned in the above eval. if [[ $source_host == "$MAIL_HOST" ]]; then prospective_mps+=(/o) fi if [[ $source_host == "$HOST2" ]]; then prospective_mps+=(/a /qr /qd /q) fi else if [[ $HOSTNAME == "$MAIL_HOST" ]]; then prospective_mps+=(/o) fi if [[ $HOSTNAME == "$HOST2" ]]; then prospective_mps+=(/a /qr /qd /q) fi if $kd_spread; then prospective_mps=(/a /o /qr /qd /q) fi fi # note: put q last just in case its specific retention options were to # affect other config sections. I havent tested if that is the case. ;; esac fi for mp in ${prospective_mps[@]}; do # default mountpoints to sync if [[ -e /nocow/btrfs-stale/$mp ]]; then e "warning: $mp stale, not adding to default mountpoints" continue fi if awk '{print $2}' /etc/fstab | grep -xF $mp &>/dev/null; then mountpoints+=($mp) fi done fi tmp=$(( ${#mountpoints[@]} == 0 )) if (( tmp )); then die didnt get mountpoint arg and had no defaults fi ##### end command line parsing ######## #### begin pre-checks ##### # todo: this has a timing problem, since btrbk.timer could activate the service after this check. if ! $fast && [[ $source ]]; then if [[ $(ssh $source ps --no-headers -o comm 1) == systemd ]]; then status=$(ssh $source systemctl is-active btrbk.service) || : # normally returns 3 case $status in inactive|failed) : ;; *) echo "$0: error: btrbk is running on source. exiting out of caution" mexit 1 esac fi fi if ! command -v btrbk &>/dev/null; then die "error: no btrbk binary found" fi if ! $pull_reexec && [[ $source ]] && $pulla && ! $force ; then ssh root@$source btrbk-run --check-installed || exit 1 fi #### end pre-checks ##### mkdir -p /var/log/btrbk # The journal doesnt go back to my oldest backups, and I've found myself # wanting older logs. Not going to bother expiring old logs, since it is # fine if they go back years. log_path=/var/log/btrbk/$(date +%F_%T%:::z).log echo copying output to $log_path exec &> >(pee cat 'ts "%F %T"|dd of='$log_path' status=none') # print some non-default opts if $verbose; then opts_show=() if ! $conf_only; then opts_show+=(conf_only=true) fi if ! $dry_run; then opts_show+=(dry_run=true) fi if [[ $rate_limit != no ]]; then opts_show+=("rate_limit=$rate_limit") fi if [[ $cmd_arg != run ]]; then opts_show+=(cmd_arg=$cmd_arg) fi if (( ${#opts_show[@]} >= 1 )); then first=true for opt in ${opts_show[@]}; do if $first; then printf "%s" "$opt" else printf " %s" "$opt" fi done echo fi fi if [[ -v targets ]]; then echo "targets: ${targets[*]}" fi if [[ $source ]]; then echo "source: $source" fi echo "mountpoints: ${mountpoints[*]}" # pull_reexec stops us from getting into an infinite loop if there is some # kind of weird problem pulla=false for m in "${mountpoints[@]}"; do if [[ $m == /a ]]; then pulla=true break fi done if ! $pull_reexec && [[ $source ]] && $pulla ; then tmpf=$(mktemp) m rsync -ra $source:/usr/local/bin/{mount-latest-subvol,check-subvol-stale} /usr/local/bin m rsync -ra $source:/usr/local/lib/bash-bear /usr/local/lib m scp $source:/a/bin/distro-setup/btrbk-run $tmpf if ! diff -q $tmpf ${BASH_SOURCE[0]}; then e "found different version on host $source. reexecing" install -T $tmpf /usr/local/bin/btrbk-run m /usr/local/bin/btrbk-run --pull-reexec "${orig_args[@]}" mexit 0 fi fi # todo: check if we have no snapshots yet, because I always want to run # archive instead of run. Likely, I should give an error unless a cli # override is passed. perhaps check-subvol-stale could give the error. # see the error message "no snapshots found" in that file. if ! $fast; then # if our mountpoints are from stale snapshots, # it doesn't make sense to do a backup. m check-subvol-stale ${mountpoints[@]} || die "found stale mountpoints in ${mountpoints[*]}" # for an initial run, btrbk requires the dir to exist. mkdir -p /mnt/{root,o}/btrbk fi local_zone=$(date +%z) if [[ $source ]]; then if $fast; then zone=$local_zone else if ! zone=$(ssh root@$source date +%z); then if $conf_only; then echo "$0: warning: failed to ssh to root@$source" else die failed to ssh to root@$source fi fi if [[ $zone != "$local_zone" ]]; then die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost" fi fi else sshable=() sshfail=() for h in ${targets[@]}; do if $fast || $conf_only; then # Use some typical values in this case root_size=$(( 1024 * 1024 * 2000 )) #2tb percent_used=10 zone=$(date +%z) elif remote_str=$(timeout -s 9 6 ssh root@$h "mkdir -p /mnt/root/btrbk /mnt/o/btrbk && date +%z && df --output=size,pcent / | tail -n1"); then mapfile -t tmp_array <<<"$remote_str" zone="${tmp_array[0]}" IFS=" " read -r root_size percent_used <<<"${tmp_array[1]}" percent_used=${percent_used%%%} tmp=$(( ${#tmp_array[@]} != 2 )) if (( tmp )); then die "error: didnt get 2 lines in test ssh to target $h. investigate" fi case $percent_used in [0-9]|[1-9][0-9]) : ;; *) die "error: didnt get percent disk use in test ssh to target $h. investigate" ;; esac else sshfail+=($h) continue fi # we may be booted into a bootstrap fs or something min_root_kb=$(( 1024 * 1024 * 200 )) # 200 gb tmp=$(( root_size < min_root_kb )) if (( tmp )); then continue fi tmp=$(( percent_used >= 98 )) if (( tmp )); then die "error: filesystem on target $h is $percent_used % full" fi # on sy, xprintidle is resetting every 12 seconds even when not # idle, i dunno why, instead we are checking if the screen is locked, # which is good enough. # # This is a separate ssh because the command can fail and thatis ok. if ! $force; then locked=false if lock_info=$(timeout -s 9 6 ssh $h DISPLAY=:0 xscreensaver-command -time); then if [[ $lock_info != *non-blanked* ]]; then locked=true fi else locked=true fi if ! $locked; then # Ignore this host. i sometimes use a non-main machine for # testing or web browsing, knowing that everything will be wiped # by the next backup, but I dont want it to happen as Im using # it from cronjob. e "warning: $h: seems to be actively in use, skipping for now" continue fi fi sshable+=($h) if [[ $zone != "$local_zone" ]]; then die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost" fi done if [[ ! ${sshable[*]} ]] || { $force && [[ ${sshfail[*]} ]]; }; then die "failed to ssh to hosts: ${sshfail[*]}" else if [[ ${sshfail[*]} ]]; then ret=1 e "error: failed to ssh to ${sshfail[*]} but continuing with other hosts" fi targets=(${sshable[@]}) fi fi cat >/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf < 1 && tg_snap_count == orphan_mp_count )) if (( tmp )) ; then die "something went wrong checking orphans on $tg: for mountpoint ${mountpoints[$i]}, $orphan_mp_count" fi done } if [[ $source ]]; then for snap in $(ssh root@$source "shopt -s nullglob; ${snap_list_cmds[*]}"); do source_snaps[$snap]=t done get-orphan-tg-snaps tmp=$(( ${#orphan_tg_snaps[*]} >= 1 )) if (( tmp )); then d btrfs sub del ${orphan_tg_snaps[*]} fi else # we have targets for tg in ${targets[@]}; do tmp_str=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}") mapfile -t tg_snaps <<<"$tmp_str" get-orphan-tg-snaps tmp=$(( ${#orphan_tg_snaps[*]} >= 1 )) if (( tmp )); then d ssh root@$tg "btrfs sub del ${orphan_tg_snaps[*]}" fi done fi # todo: umount first to ensure we don't have any errors # todo: do some kill fuser stuff to make umount more reliable if $conf_only; then mexit 0 fi if $dry_run; then m btrbk -c /etc/btrbk$conf_suf.conf -v -n $cmd_arg mexit 0 fi # -q and just using the syslog option seemed nice, # but it doesn't show when a send has a parent and when it doesn't. m btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg $cmd_arg if $early; then exit 0 fi # todo: tp not valid anymore. # if we have it, sync to systems which don't if mountpoint $rsync_mountpoint >/dev/null; then for tg in ${targets[@]}; do case $tg in tp) dirs=(/p/c/machine_specific/tp) for x in /p/c/machine_specific/*.hosts; do if grep -qxF $tg $x; then dirs+=(${x%.hosts}) fi done m rsync -aSAXPH --specials --devices --delete --relative ${dirs[@]} root@$tg:/ ;; esac done fi subvols=() for mp in "${mountpoints[@]}"; do subvols+=("${mp##*/}") done if [[ $source ]]; then m mount-latest-subvol "${subvols[@]}" else for tg in ${targets[@]}; do m /a/exe/mount-latest-remote "$tg" "${subvols[@]}" || ret=$? done fi # todo, we get hostnames earlier, reuse that. if [[ $ret == 0 ]]; then for tg in ${targets[@]}; do h=$(ssh $tg hostname) if [[ $h == kd && $HOSTNAME == x3 && $HOSTNAME == "$MAIL_HOST" ]]; then m ssh root@$tg 'btrbk-spread-wrap &>/dev/null