X-Git-Url: https://iankelling.org/git/?a=blobdiff_plain;f=btrbk-run;h=c4d84616a9c3427391e9a095e65ccd14d369bcac;hb=768363d8771edb9d9ed82425fa772d77b90139c0;hp=f768ad4ffff561c65bf1d67ac3d8d9a02a9b74d6;hpb=541688fb965b26a16cda0801591df64551c51115;p=distro-setup diff --git a/btrbk-run b/btrbk-run index f768ad4..c4d8461 100644 --- a/btrbk-run +++ b/btrbk-run @@ -39,14 +39,96 @@ EOF } +pre=btrbk-run script_name="${BASH_SOURCE[0]}" script_name="${script_name##*/}" -pre="${SSH_CLIENT:+$HOSTNAME} $script_name:" -m() { if $verbose; then printf "$pre%s\n" "$*"; fi; "$@"; } -e() { printf "$pre%s\n" "$*"; } -die() { printf "$pre%s\n" "$*" >&2; echo "exiting with status 1" >&2; exit 1; } -mexit() { echo "$pre: exiting with status $1"; exit $1; } +d() { + if $dry_run || $conf_only; then + printf "$pre dry-run: %s\n" "$*" + else + printf "$pre running: %s\n" "$*" + "$@" + fi +} +m() { if $verbose; then printf "$pre %s\n" "$*"; fi; "$@"; } +e() { printf "$pre %s\n" "$*"; } +die() { printf "$pre error: %s\n" "$*" >&2; echo "$pre exiting with status 1" >&2; exit 1; } +mexit() { echo "$pre exiting with status $1"; exit $1; } + +uninstalled-file-die() { + die "uninstalled file $1. run install-my-scripts or rerun with -f" +} + +set-location() { + case $HOSTNAME in + kw) + at_work=true + ;; + kd|frodo) + at_home=true + ;; + x2|x3|sy) + if [[ $(dig +short @10.2.0.1 -x 10.2.0.2 2>&1 ||:) == kd.b8.nz. ]] \ + && ip n show 10.2.0.1 | grep . &>/dev/null; then + at_home=true + elif ping -q -c1 -w1 hal.office.fsf.org &>/dev/null \ + && ip n show 192.168.0.26 | grep . &>/dev/null; then + at_work=true + fi + ;; + esac +} + +exit-if-no-default-targets() { + if ! $force && [[ $HOSTNAME != "$MAIL_HOST" ]]; then + echo "MAIL_HOST=$MAIL_HOST, nothing to do" + mexit 0 + fi + case $HOSTNAME in + kw|kd|frodo|x2|x3|sy) : ;; + *) + die "error: no default targets for this host, use -t" + ;; + esac +} + +add-x3-target() { + # main work machine + if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then + targets+=(x3.office.fsf.org) + elif ping -q -c1 -w1 $h.b8.nz &>/dev/null; then + # in case we took it home + targets+=(x3.b8.nz) + else + targets+=(x3wg.b8.nz) + fi +} + +add-wireless-target-h() { + if ping -q -c1 -w1 $h.b8.nz &>/dev/null; then + targets+=($h.b8.nz) + elif ping -q -c1 -w1 ${h}w.b8.nz &>/dev/null; then + targets+=(${h}w.b8.nz) + fi +} + +qconf() { + case $sub in + q) + # q has sensitive data i dont want to backup for so long + cat >>/etc/btrbk$conf_suf.conf < >(sed '/No such file or directory/d'); then + # shellcheck disable=SC2046 # we want word splitting + set -- $(< $once_args_file-tmp) "$@" + # i havent used this feature yet, so warn about it + echo "$0: btrbk-run options set in $once_args_file:" + cat $once_args_file-tmp + rm -f $once_args_file-tmp +fi + + targets=() early=false -cron=false fast=false -kd_spread_maybe=false +kd_spread=false +check_installed=false orig_args=("$@") -temp=$(getopt -l cron,fast,pull-reexec,help 23ceikl:m:npqrs:t:vh "$@") || usage 1 +temp=$(getopt -l check-installed,fast,pull-reexec,help 23cefikl:m:npqrs:t:vh "$@") || usage 1 eval set -- "$temp" while true; do case $1 in - # some behaviors specific to running under cron: - # - skip hosts where xprintidle haven't been idle recently - # - if we can't ssh to 1 or more hosts, still do the rest - # - if we aren't MAIL_HOST and no -m or -s, just exit - --cron) - cron=true - pre= - ;; # for the rare case we want to run multiple instances at the same time -2) conf_suf=2 ;; -3) conf_suf=3 ;; # only creates the config file, does not run btrbk -c) conf_only=true ;; + --check-installed) + check_installed=true + ;; # quit early, just btrbk, no extra remounting etc. -e) early=true ;; + # avoids some default behaviors: + # - no skipping hosts where xprintidle haven't been idle recently + # - exit if we can't ssh to 1 or more hosts + # - still set default hosts despite MAIL_HOST status + -f) force=true ;; # skip various checks. when we run twice in a row for # switch mail-host, no need to repeat the same checks again. --fast) fast=true ;; -i) incremental_strict=true ;; - # note this implies resume - -k) kd_spread_maybe=true ;; + # note this implies resume and -p because it is just meant to make + # other hosts have the same snapshots, not do any expiry or new + # backups. + -k) kd_spread=true ;; # bytes per second, suffix k m g -l) rate_limit=$2; shift ;; # Comma separated mountpoints to backup. This has defaults set below. -m) IFS=, mountpoints=($2); unset IFS; shift ;; -n) dry_run=true ;; - # hide progress - -p) progress_arg= ;; + # preserve existing snapshots and backups + -p) preserve_arg=-p ;; # internal option for rerunning under newer SOURCE_HOST version. --pull-reexec) pull_reexec=true;; # quiet @@ -148,15 +244,49 @@ while true; do shift done -if $kd_spread_maybe; then - if [[ $1 && $1 != resume ]]; then +cmd_arg="$1" + + + +if ! $force && { $check_installed || [[ ! $source ]]; } ; then + install_bin_files=( + mount-latest-subvol + check-subvol-stale + btrbk-run + ) + for f in ${install_bin_files[@]}; do + if ! diff -q /a/bin/ds/$f /usr/local/bin/$f; then + uninstalled-file-die $f + fi + done + if ! diff -q /a/bin/errhandle/err /usr/local/lib/err; then + uninstalled-file-die err + fi + if $check_installed; then + exit 0 + fi +fi + + +if $kd_spread; then + if [[ $cmd_arg && $cmd_arg != resume ]]; then die "dont pass -k without resume or empty run arg" fi + if [[ $HOSTNAME == "$MAIL_HOST" ]]; then + die "something went wrong, -k not meant to be run on MAIL_HOST" + fi + if [[ $HOSTNAME != kd ]]; then + die "something went wrong, -k only meant to run on kd" + fi cmd_arg=resume + preserve_arg=-p + h=sy + add-wireless-target-h fi - -cmd_arg=${1:-run} +if [[ ! $cmd_arg ]]; then + cmd_arg=run +fi std_preserve="36h 14d 8w 24m" @@ -209,127 +339,43 @@ fi # targets, plus any given on the command line. +at_work=false +at_home=false + -kd_spread=false # set default targets if [[ ! -v targets && ! $source ]]; then - if $cron; then - if [[ $HOSTNAME != "$MAIL_HOST" ]]; then - if $kd_spread_maybe && [[ $HOSTNAME == kd && $MAIL_HOST == x3 ]]; then - if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then - work_host=x3.office.fsf.org - elif ping -q -c1 -w1 x3wg.b8.nz &>/dev/null; then - work_host=x3wg.b8.nz - fi - if [[ $work_host ]]; then - source_state="$(ssh $work_host cat /a/bin/bash_unpublished/source-state)" - eval "$source_state" - if [[ $MAIL_HOST == x3 ]]; then - kd_spread=true - else - echo "MAIL_HOST=$MAIL_HOST, nothing to do" - mexit 0 - fi - else - echo "MAIL_HOST=$MAIL_HOST, nothing to do" - mexit 0 - fi - else - echo "MAIL_HOST=$MAIL_HOST, nothing to do" - mexit 0 - fi + exit-if-no-default-targets + set-location + if $at_home; then + if ! $kd_spread && [[ $HOSTNAME != x3 ]]; then + add-x3-target fi - fi - - at_work=false - at_home=false - - case $HOSTNAME in - kw|kd|frodo|x2|x3|sy) : ;; - *) - die "error: no default targets for this host, use -t" - ;; - esac - - case $HOSTNAME in - kw) - at_work=true - ;;& - kd|frodo) - at_home=true - ;;& - x2|x3|sy) - if [[ $(dig +short @10.2.0.1 -x 10.2.0.2 2>&1 ||:) == kd.b8.nz. ]] \ - && ip n show 10.2.0.1 | grep . &>/dev/null; then - at_home=true - elif ping -q -c1 -w1 hal.office.fsf.org &>/dev/null \ - && ip n show 192.168.0.26 | grep . &>/dev/null; then - at_work=true + if [[ $HOSTNAME != kd ]]; then + targets+=(kd.b8.nz) + fi + wireless_home_hosts=( + x2 + sy + ) + for h in ${wireless_home_hosts[@]}; do + if [[ $HOSTNAME != "$h" ]]; then + add-wireless-target-h fi - ;;& - *) - if $at_home; then - if ! $kd_spread && [[ $HOSTNAME != x3 ]]; then - # main work machine - if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then - targets+=(x3.office.fsf.org) - elif ping -q -c1 -w1 $h.b8.nz &>/dev/null; then - # in case we took it home - targets+=(x3.b8.nz) - else - targets+=(x3wg.b8.nz) - fi - fi - # temporarily disabled while doing recovery - # for h in frodo kd; do - for h in kd; do - if [[ $HOSTNAME == "$h" ]]; then - continue - fi - targets+=($h.b8.nz) - done - for h in x2 sy; do - if [[ $HOSTNAME == "$h" ]]; then - continue - fi - if ping -q -c1 -w1 $h.b8.nz &>/dev/null; then - targets+=($h.b8.nz) - elif ping -q -c1 -w1 ${h}w.b8.nz &>/dev/null; then - targets+=(${h}w.b8.nz) - fi - done - elif $at_work; then - if ping -q -c1 -w1 iank.vpn.office.fsf.org &>/dev/null; then - targets+=(iank.vpn.office.fsf.org) - else - targets+=(i.b8.nz) - fi - for h in x2 x3 kw; do - if [[ $HOSTNAME == "$h" ]]; then - continue - fi - if ping -q -c1 -w1 $h.office.fsf.org &>/dev/null; then - targets+=($h.office.fsf.org) - fi - done - else - targets+=(i.b8.nz) + done + elif $at_work; then + targets+=(i.b8.nz) + for h in x2 x3 kw; do + if [[ $HOSTNAME == "$h" ]]; then + continue fi - ;; - esac -fi - -if $verbose; then - printf "$pre options: conf_only=%s\ndry_run=%s\nrate_limit=%s\nverbose=%s\ncmd_arg=%s" "$conf_only" "$dry_run" "$rate_limit" "$verbose" "$cmd_arg" -fi - - -if [[ -v targets ]]; then - echo "targets: ${targets[*]}" -fi - -if [[ $source ]]; then - echo "source: $source" + if ping -q -c1 -w1 $h.office.fsf.org &>/dev/null; then + targets+=($h.office.fsf.org) + fi + done + else + targets+=(i.b8.nz) + fi fi if [[ ${mountpoints[0]} ]]; then @@ -347,24 +393,24 @@ else *) prospective_mps=() if [[ $source ]]; then - source_state="$(ssh $source cat /a/bin/bash_unpublished/source-state)" + source_state="$(ssh $source 'cat /a/bin/bash_unpublished/source-state; echo source_host=$HOSTNAME')" eval "$source_state" - source_host="$(ssh $source cat /etc/hostname)" + # shellcheck disable=SC2154 # assigned in the above eval. if [[ $source_host == "$MAIL_HOST" ]]; then prospective_mps+=(/o) fi if [[ $source_host == "$HOST2" ]]; then - prospective_mps+=(/a /ar /qr /q) + prospective_mps+=(/a /ar /qr /qd /q) fi else if [[ $HOSTNAME == "$MAIL_HOST" ]]; then prospective_mps+=(/o) fi if [[ $HOSTNAME == "$HOST2" ]]; then - prospective_mps+=(/a /ar /qr /q) + prospective_mps+=(/a /ar /qr /qd /q) fi if $kd_spread; then - prospective_mps=(/a /ar /o /qr /q) + prospective_mps=(/a /ar /o /qr /qd /q) fi fi # note: put q last just in case its specific retention options were to @@ -383,14 +429,15 @@ else done fi -if (( ! ${#mountpoints[@]} )); then +tmp=$(( ${#mountpoints[@]} == 0 )) +if (( tmp )); then die didnt get mountpoint arg and had no defaults fi -echo "mountpoints: ${mountpoints[*]}" - ##### end command line parsing ######## +#### begin pre-checks ##### + # todo: this has a timing problem, since btrbk.timer could activate the service after this check. if ! $fast && [[ $source ]]; then if [[ $(ssh $source ps --no-headers -o comm 1) == systemd ]]; then @@ -398,12 +445,50 @@ if ! $fast && [[ $source ]]; then case $status in inactive|failed) : ;; *) - echo "$0: error: cron btrbk is running on source. exiting out of caution" + echo "$0: error: btrbk is running on source. exiting out of caution" mexit 1 esac fi fi +if ! command -v btrbk &>/dev/null; then + die "error: no btrbk binary found" +fi + +if ! $pull_reexec && [[ $source ]] && $pulla && ! $force ; then + ssh root@$source btrbk-run --check-installed || exit 1 +fi + +#### end pre-checks ##### + + + +mkdir -p /var/log/btrbk +# The journal doesnt go back to my oldest backups, and I've found myself +# wanting older logs. Not going to bother expiring old logs, since it is +# fine if they go back years. +log_path=/var/log/btrbk/$(date +%F_%T%:::z).log +echo copying output to $log_path +exec &> >(pee cat 'ts "%F %T"|dd of='$log_path' status=none') + + +if $verbose; then + printf " options: conf_only=%s\ndry_run=%s\nrate_limit=%s\nverbose=%s\ncmd_arg=%s\n" "$conf_only" "$dry_run" "$rate_limit" "$verbose" "$cmd_arg" +fi + +if [[ -v targets ]]; then + echo "targets: ${targets[*]}" +fi + +if [[ $source ]]; then + echo "source: $source" +fi + +echo "mountpoints: ${mountpoints[*]}" + + + + # pull_reexec stops us from getting into an infinite loop if there is some # kind of weird problem pulla=false @@ -413,6 +498,7 @@ for m in "${mountpoints[@]}"; do break fi done + if ! $pull_reexec && [[ $source ]] && $pulla ; then tmpf=$(mktemp) m rsync -ra $source:/usr/local/bin/{mount-latest-subvol,check-subvol-stale} /usr/local/bin @@ -427,10 +513,6 @@ if ! $pull_reexec && [[ $source ]] && $pulla ; then fi -if ! command -v btrbk &>/dev/null; then - die "error: no btrbk binary found" -fi - if ! $fast; then # if our mountpoints are from stale snapshots, # it doesn't make sense to do a backup. @@ -472,7 +554,8 @@ else IFS=" " read -r root_size percent_used <<<"${tmp_array[1]}" percent_used=${percent_used%%%} - if (( ${#tmp_array[@]} != 2 )); then + tmp=$(( ${#tmp_array[@]} != 2 )) + if (( tmp )); then die "error: didnt get 2 lines in test ssh to target $h. investigate" fi case $percent_used in @@ -488,11 +571,13 @@ else # we may be booted into a bootstrap fs or something min_root_kb=$(( 1024 * 1024 * 200 )) # 200 gb - if (( root_size < min_root_kb )); then + tmp=$(( root_size < min_root_kb )) + if (( tmp )); then continue fi - if (( percent_used >= 98 )); then + tmp=$(( percent_used >= 98 )) + if (( tmp )); then die "error: filesystem on target $h is $percent_used % full" fi @@ -501,7 +586,7 @@ else # which is good enough. # # This is a separate ssh because the command can fail and thatis ok. - if $cron; then + if ! $force; then locked=false if lock_info=$(timeout -s 9 6 ssh $h DISPLAY=:0 xscreensaver-command -time); then if [[ $lock_info != *non-blanked* ]]; then @@ -510,7 +595,7 @@ else else locked=true fi - if $locked; then + if ! $locked; then # Ignore this host. i sometimes use a non-main machine for # testing or web browsing, knowing that everything will be wiped # by the next backup, but I dont want it to happen as Im using @@ -524,7 +609,7 @@ else die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost" fi done - if [[ ! ${sshable[*]} ]] || { ! $cron && [[ ${sshfail[*]} ]]; }; then + if [[ ! ${sshable[*]} ]] || { $force && [[ ${sshfail[*]} ]]; }; then die "failed to ssh to hosts: ${sshfail[*]}" else if [[ ${sshfail[*]} ]]; then @@ -563,14 +648,18 @@ snapshot_create onchange # if one disk had less space. # for now, keeping them equal. snapshot_preserve $std_preserve -snapshot_preserve_min 2h +snapshot_preserve_min 6h snapshot_dir btrbk # so, total backups = ~58 target_preserve $std_preserve -target_preserve_min 2h +target_preserve_min 6h -# i tried this when investigating: clone no source subvolume found error -#incremental_prefs sro:1 srn:1 sao san:1 aro:1 arn:1 +# it seems very likely that not doing this could result in clone source not found +# errors, for example when expiry happens differently on different hosts, +# also, as btrbk does by default, if a failed send happens, on the next run it +# will warn about a stray subvolume, but then create a backup of a newer subvol +# and use an older subvol as the parent. +incremental_prefs sao:1 # if something fails and it's not obvious, try doing # btrbk -l debug -v dryrun @@ -584,21 +673,6 @@ incremental strict EOF fi -qconf() { - case $sub in - q) - # q has sensitive data i dont want to backup for so long - cat >>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf < 1 && tg_snap_count == orphan_mp_count )) + if (( tmp )) ; then + die "something went wrong checking orphans on $tg: for mountpoint ${mountpoints[$i]}, $orphan_mp_count" + fi + done +} + +if [[ $source ]]; then + for snap in $(ssh root@$source "shopt -s nullglob; ${snap_list_cmds[*]}"); do + source_snaps[$snap]=t + done + get-orphan-tg-snaps + tmp=$(( ${#orphan_tg_snaps[*]} >= 1 )) + if (( tmp )); then + d btrfs sub del ${orphan_tg_snaps[*]} + fi +else # we have targets + for tg in ${targets[@]}; do + tmp_str=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}") + mapfile -t tg_snaps <<<"$tmp_str" + get-orphan-tg-snaps + tmp=$(( ${#orphan_tg_snaps[*]} >= 1 )) + if (( tmp )); then + d ssh root@$tg "btrfs sub del ${orphan_tg_snaps[*]}" + fi + done +fi + # todo: umount first to ensure we don't have any errors # todo: do some kill fuser stuff to make umount more reliable @@ -700,11 +835,19 @@ else done fi +# todo, we get hostnames earlier, reuse that. if [[ $ret == 0 ]]; then for tg in ${targets[@]}; do - : - #ssh root@$tg /a/exe/mail-backup-clean + h=$(ssh $tg hostname) + if [[ $h == kd && $HOSTNAME == x3 && $HOSTNAME == "$MAIL_HOST" ]]; then + m ssh root@$tg 'btrbk-spread-wrap &>/dev/null