X-Git-Url: https://iankelling.org/git/?a=blobdiff_plain;f=btrbk-run;h=5388858b946c44640ed18fa25285acfbdcb296ac;hb=dc34d23c63cd83a7cc7a79525445aad3293c7241;hp=50e64b811f0809d2a9a09161534f5750d2e1b46a;hpb=9c77c557e60d21caceeef1e78e35b35ed968fca9;p=distro-setup diff --git a/btrbk-run b/btrbk-run index 50e64b8..5388858 100644 --- a/btrbk-run +++ b/btrbk-run @@ -77,6 +77,7 @@ pull_reexec=false default_args_file=/etc/btrbk-run.conf if [[ -s $default_args_file ]]; then + # shellcheck disable=SC2046 # we want word splitting set -- $(< $default_args_file) "$@" # i havent used this feature yet, so warn about it echo "$0: warning: default btrbk-run options set in $default_args_file (sleeping 5 seconds):" @@ -87,8 +88,10 @@ fi targets=() early=false cron=false +fast=false +kd_spread_maybe=false orig_args=("$@") -temp=$(getopt -l cron,pull-reexec,help 23ceil:m:npqrs:t:vh "$@") || usage 1 +temp=$(getopt -l cron,fast,pull-reexec,help 23ceikl:m:npqrs:t:vh "$@") || usage 1 eval set -- "$temp" while true; do case $1 in @@ -107,7 +110,12 @@ while true; do -c) conf_only=true ;; # quit early, just btrbk, no extra remounting etc. -e) early=true ;; + # skip various checks. when we run twice in a row for + # switch mail-host, no need to repeat the same checks again. + --fast) fast=true ;; -i) incremental_strict=true ;; + # note this implies resume + -k) kd_spread_maybe=true ;; # bytes per second, suffix k m g -l) rate_limit=$2; shift ;; # Comma separated mountpoints to backup. This has defaults set below. @@ -140,7 +148,18 @@ while true; do shift done -cmd_arg=${1:-run} +cmd_arg="$1" + +if $kd_spread_maybe; then + if [[ $cmd_arg && $cmd_arg != resume ]]; then + die "dont pass -k without resume or empty run arg" + fi + cmd_arg=resume +fi + +if [[ ! $cmd_arg ]]; then + cmd_arg=run +fi std_preserve="36h 14d 8w 24m" @@ -175,9 +194,6 @@ if [[ -v targets && $source ]]; then die "error: -t and -s are mutually exclusive" fi -if $verbose; then - printf "$pre options: conf_only=%s\ndry_run=%s\nrate_limit=%s\nverbose=%s\ncmd_arg=%s" "$conf_only" "$dry_run" "$rate_limit" "$verbose" "$cmd_arg" -fi ### end options parsing # remove path from earlier version of btrbk @@ -202,8 +218,27 @@ kd_spread=false if [[ ! -v targets && ! $source ]]; then if $cron; then if [[ $HOSTNAME != "$MAIL_HOST" ]]; then - if [[ $HOSTNAME == kd && $MAIL_HOST = x2 ]]; then - kd_spread=true + if $kd_spread_maybe && [[ $HOSTNAME == kd && $MAIL_HOST == x3 ]]; then + if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then + work_host=x3.office.fsf.org + elif ping -q -c1 -w1 x3wg.b8.nz &>/dev/null; then + work_host=x3wg.b8.nz + fi + if [[ $work_host ]]; then + source_state="$(ssh $work_host cat /a/bin/bash_unpublished/source-state)" + eval "$source_state" + if [[ $MAIL_HOST == x3 ]]; then + kd_spread=true + else + # x3 was the mail host, but it moved to some other machine + # without updating us yet. + echo "MAIL_HOST=$MAIL_HOST, nothing to do" + mexit 0 + fi + else + echo "MAIL_HOST=$MAIL_HOST, nothing to do" + mexit 0 + fi else echo "MAIL_HOST=$MAIL_HOST, nothing to do" mexit 0 @@ -211,9 +246,6 @@ if [[ ! -v targets && ! $source ]]; then fi fi - # x2 at home atm - kd_spread=false - at_work=false at_home=false @@ -242,19 +274,26 @@ if [[ ! -v targets && ! $source ]]; then ;;& *) if $at_home; then - # main work machine - if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then - targets+=(x3.office.fsf.org) - else - targets+=(x3wg.b8.nz) + if ! $kd_spread && [[ $HOSTNAME != x3 ]]; then + # main work machine + if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then + targets+=(x3.office.fsf.org) + elif ping -q -c1 -w1 $h.b8.nz &>/dev/null; then + # in case we took it home + targets+=(x3.b8.nz) + else + targets+=(x3wg.b8.nz) + fi fi - for h in frodo kd; do + # temporarily disabled while doing recovery + # for h in frodo kd; do + for h in kd; do if [[ $HOSTNAME == "$h" ]]; then continue fi targets+=($h.b8.nz) done - for h in x2 x3 sy; do + for h in x2 sy; do if [[ $HOSTNAME == "$h" ]]; then continue fi @@ -265,11 +304,7 @@ if [[ ! -v targets && ! $source ]]; then fi done elif $at_work; then - if ping -q -c1 -w1 iank.vpn.office.fsf.org &>/dev/null; then - targets+=(iank.vpn.office.fsf.org) - else - targets+=(i.b8.nz) - fi + targets+=(i.b8.nz) for h in x2 x3 kw; do if [[ $HOSTNAME == "$h" ]]; then continue @@ -285,14 +320,6 @@ if [[ ! -v targets && ! $source ]]; then esac fi -if [[ -v targets ]]; then - echo "targets: ${targets[*]}" -fi - -if [[ $source ]]; then - echo "source: $source" -fi - if [[ ${mountpoints[0]} ]]; then for mp in ${mountpoints[@]}; do if [[ -e /nocow/btrfs-stale/$mp ]]; then @@ -324,6 +351,9 @@ else if [[ $HOSTNAME == "$HOST2" ]]; then prospective_mps+=(/a /ar /qr /q) fi + if $kd_spread; then + prospective_mps=(/a /ar /o /qr /q) + fi fi # note: put q last just in case its specific retention options were to # affect other config sections. I havent tested if that is the case. @@ -341,11 +371,16 @@ else done fi -echo "mountpoints: ${mountpoints[*]}" +if (( ! ${#mountpoints[@]} )); then + die didnt get mountpoint arg and had no defaults +fi ##### end command line parsing ######## -if [[ $source ]]; then +#### begin pre-checks ##### + +# todo: this has a timing problem, since btrbk.timer could activate the service after this check. +if ! $fast && [[ $source ]]; then if [[ $(ssh $source ps --no-headers -o comm 1) == systemd ]]; then status=$(ssh $source systemctl is-active btrbk.service) || : # normally returns 3 case $status in @@ -357,6 +392,38 @@ if [[ $source ]]; then fi fi +if ! command -v btrbk &>/dev/null; then + die "error: no btrbk binary found" +fi + + +#### end pre-checks ##### + +mkdir -p /var/log/btrbk +# The journal doesnt go back to my oldest backups, and I've found myself +# wanting older logs. Not going to bother expiring old logs, since it is +# fine if they go back years. +log_path=/var/log/btrbk/$(date +%F_%T%:::z).log +echo copying output to $log_path +exec &> >(ts "%F %T" | tee -a $log_path) + + +if $verbose; then + printf "$pre options: conf_only=%s\ndry_run=%s\nrate_limit=%s\nverbose=%s\ncmd_arg=%s" "$conf_only" "$dry_run" "$rate_limit" "$verbose" "$cmd_arg" +fi + +if [[ -v targets ]]; then + echo "targets: ${targets[*]}" +fi + +if [[ $source ]]; then + echo "source: $source" +fi + +echo "mountpoints: ${mountpoints[*]}" + + + # pull_reexec stops us from getting into an infinite loop if there is some # kind of weird problem pulla=false @@ -380,48 +447,56 @@ if ! $pull_reexec && [[ $source ]] && $pulla ; then fi -if ! command -v btrbk &>/dev/null; then - die "error: no btrbk binary found" -fi -# if our mountpoints are from stale snapshots, -# it doesn't make sense to do a backup. -m check-subvol-stale ${mountpoints[@]} || die "found stale mountpoints in ${mountpoints[*]}" +if ! $fast; then + # if our mountpoints are from stale snapshots, + # it doesn't make sense to do a backup. + m check-subvol-stale ${mountpoints[@]} || die "found stale mountpoints in ${mountpoints[*]}" -# for an initial run, btrbk requires the dir to exist. -mkdir -p /mnt/{root,o}/btrbk + # for an initial run, btrbk requires the dir to exist. + mkdir -p /mnt/{root,o}/btrbk +fi local_zone=$(date +%z) if [[ $source ]]; then - if ! zone=$(ssh root@$source date +%z); then - if $conf_only; then - echo "$0: warning: failed to ssh to root@$source" - else - die failed to ssh to root@$source + if $fast; then + zone=$local_zone + else + if ! zone=$(ssh root@$source date +%z); then + if $conf_only; then + echo "$0: warning: failed to ssh to root@$source" + else + die failed to ssh to root@$source + fi + fi + if [[ $zone != "$local_zone" ]]; then + die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost" fi fi - if [[ $zone != "$local_zone" ]]; then - die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost" - fi - else sshable=() sshfail=() - min_idle_ms=$((1000 * 60 * 15)) for h in ${targets[@]}; do - if remote_info=( $(timeout -s 9 6 ssh root@$h "mkdir -p /mnt/root/btrbk /mnt/o/btrbk && date +%z && df --output=size,pcent / | tail -n1") ); then - zone=${remote_info[0]} - root_size=${remote_info[1]} - percent_used=${remote_info[2]%%%} - - if (( ${#remote_info[@]} != 3 )); then - die "error: didnt get 3 fields in test ssh to target $h. investigate" - fi - elif $conf_only; then + if $fast || $conf_only; then # Use some typical values in this case root_size=$(( 1024 * 1024 * 2000 )) #2tb percent_used=10 zone=$(date +%z) + elif remote_str=$(timeout -s 9 6 ssh root@$h "mkdir -p /mnt/root/btrbk /mnt/o/btrbk && date +%z && df --output=size,pcent / | tail -n1"); then + mapfile -t tmp_array <<<"$remote_str" + zone="${tmp_array[0]}" + IFS=" " read -r root_size percent_used <<<"${tmp_array[1]}" + percent_used=${percent_used%%%} + + if (( ${#tmp_array[@]} != 2 )); then + die "error: didnt get 2 lines in test ssh to target $h. investigate" + fi + case $percent_used in + [0-9]|[1-9][0-9]) : ;; + *) + die "error: didnt get percent disk use in test ssh to target $h. investigate" + ;; + esac else sshfail+=($h) continue @@ -437,15 +512,26 @@ else die "error: filesystem on target $h is $percent_used % full" fi - # This is a separate ssh because xprintidle can fail and thats ok. - if $cron && idle_ms=$(timeout -s 9 6 ssh $h DISPLAY=:0 xprintidle); then - if (( idle_ms < min_idle_ms )); then - + # on sy, xprintidle is resetting every 12 seconds even when not + # idle, i dunno why, instead we are checking if the screen is locked, + # which is good enough. + # + # This is a separate ssh because the command can fail and thatis ok. + if $cron; then + locked=false + if lock_info=$(timeout -s 9 6 ssh $h DISPLAY=:0 xscreensaver-command -time); then + if [[ $lock_info != *non-blanked* ]]; then + locked=true + fi + else + locked=true + fi + if ! $locked; then # Ignore this host. i sometimes use a non-main machine for # testing or web browsing, knowing that everything will be wiped # by the next backup, but I dont want it to happen as Im using # it from cronjob. - e "warning: $h: active X session in the last 15 minutes, skipping for now" + e "warning: $h: seems to be actively in use, skipping for now" continue fi fi @@ -493,11 +579,14 @@ snapshot_create onchange # if one disk had less space. # for now, keeping them equal. snapshot_preserve $std_preserve -snapshot_preserve_min 2h +snapshot_preserve_min 6h snapshot_dir btrbk # so, total backups = ~58 target_preserve $std_preserve -target_preserve_min 2h +target_preserve_min 6h + +# i tried this when investigating: clone no source subvolume found error +#incremental_prefs sro:1 srn:1 sao san:1 aro:1 arn:1 # if something fails and it's not obvious, try doing # btrbk -l debug -v dryrun @@ -531,7 +620,7 @@ EOF mp_count=${#mountpoints[@]} for (( i=0; i < mp_count - 1 ; i++ )); do if [[ ${mountpoints[i]} == /q ]]; then - unset mountpoints[i] + unset "mountpoints[i]" mountpoints+=(/q) fi done @@ -615,17 +704,27 @@ if mountpoint $rsync_mountpoint >/dev/null; then done fi +subvols=() +for mp in "${mountpoints[@]}"; do + subvols+=("${mp##*/}") +done if [[ $source ]]; then - m mount-latest-subvol + m mount-latest-subvol "${subvols[@]}" else - m /a/exe/mount-latest-remote ${targets[@]} + for tg in ${targets[@]}; do + m /a/exe/mount-latest-remote "$tg" "${subvols[@]}" || ret=$? + done fi if [[ $ret == 0 ]]; then for tg in ${targets[@]}; do - : - #ssh root@$tg /a/exe/mail-backup-clean + h=$(ssh root@$tg hostname) + rsync -a /var/log/btrbk $tg:/var/log/btrbk/$tg + ssh root@$tg /usr/local/bin/mail-backup-clean done + if [[ $source ]]; then + rsync -a $source:/var/log/btrbk /var/log/btrbk/$source + fi fi mexit $ret