X-Git-Url: https://iankelling.org/git/?a=blobdiff_plain;f=btrbk-run;h=5388858b946c44640ed18fa25285acfbdcb296ac;hb=dc34d23c63cd83a7cc7a79525445aad3293c7241;hp=dcca4b7dd036dca6fb662f40e9904a824160e6bd;hpb=72c18f3a6a7f1ed0ca16af654a1f804ab96e1ff9;p=distro-setup diff --git a/btrbk-run b/btrbk-run index dcca4b7..5388858 100644 --- a/btrbk-run +++ b/btrbk-run @@ -89,8 +89,9 @@ targets=() early=false cron=false fast=false +kd_spread_maybe=false orig_args=("$@") -temp=$(getopt -l cron,fast,pull-reexec,help 23ceil:m:npqrs:t:vh "$@") || usage 1 +temp=$(getopt -l cron,fast,pull-reexec,help 23ceikl:m:npqrs:t:vh "$@") || usage 1 eval set -- "$temp" while true; do case $1 in @@ -113,6 +114,8 @@ while true; do # switch mail-host, no need to repeat the same checks again. --fast) fast=true ;; -i) incremental_strict=true ;; + # note this implies resume + -k) kd_spread_maybe=true ;; # bytes per second, suffix k m g -l) rate_limit=$2; shift ;; # Comma separated mountpoints to backup. This has defaults set below. @@ -145,7 +148,18 @@ while true; do shift done -cmd_arg=${1:-run} +cmd_arg="$1" + +if $kd_spread_maybe; then + if [[ $cmd_arg && $cmd_arg != resume ]]; then + die "dont pass -k without resume or empty run arg" + fi + cmd_arg=resume +fi + +if [[ ! $cmd_arg ]]; then + cmd_arg=run +fi std_preserve="36h 14d 8w 24m" @@ -180,9 +194,6 @@ if [[ -v targets && $source ]]; then die "error: -t and -s are mutually exclusive" fi -if $verbose; then - printf "$pre options: conf_only=%s\ndry_run=%s\nrate_limit=%s\nverbose=%s\ncmd_arg=%s" "$conf_only" "$dry_run" "$rate_limit" "$verbose" "$cmd_arg" -fi ### end options parsing # remove path from earlier version of btrbk @@ -207,7 +218,7 @@ kd_spread=false if [[ ! -v targets && ! $source ]]; then if $cron; then if [[ $HOSTNAME != "$MAIL_HOST" ]]; then - if [[ $HOSTNAME == kd && $MAIL_HOST == x3 ]]; then + if $kd_spread_maybe && [[ $HOSTNAME == kd && $MAIL_HOST == x3 ]]; then if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then work_host=x3.office.fsf.org elif ping -q -c1 -w1 x3wg.b8.nz &>/dev/null; then @@ -219,6 +230,8 @@ if [[ ! -v targets && ! $source ]]; then if [[ $MAIL_HOST == x3 ]]; then kd_spread=true else + # x3 was the mail host, but it moved to some other machine + # without updating us yet. echo "MAIL_HOST=$MAIL_HOST, nothing to do" mexit 0 fi @@ -261,21 +274,26 @@ if [[ ! -v targets && ! $source ]]; then ;;& *) if $at_home; then - if ! $kd_spread; then + if ! $kd_spread && [[ $HOSTNAME != x3 ]]; then # main work machine if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then targets+=(x3.office.fsf.org) + elif ping -q -c1 -w1 $h.b8.nz &>/dev/null; then + # in case we took it home + targets+=(x3.b8.nz) else targets+=(x3wg.b8.nz) fi fi - for h in frodo kd; do + # temporarily disabled while doing recovery + # for h in frodo kd; do + for h in kd; do if [[ $HOSTNAME == "$h" ]]; then continue fi targets+=($h.b8.nz) done - for h in x2 x3 sy; do + for h in x2 sy; do if [[ $HOSTNAME == "$h" ]]; then continue fi @@ -286,11 +304,7 @@ if [[ ! -v targets && ! $source ]]; then fi done elif $at_work; then - if ping -q -c1 -w1 iank.vpn.office.fsf.org &>/dev/null; then - targets+=(iank.vpn.office.fsf.org) - else - targets+=(i.b8.nz) - fi + targets+=(i.b8.nz) for h in x2 x3 kw; do if [[ $HOSTNAME == "$h" ]]; then continue @@ -306,14 +320,6 @@ if [[ ! -v targets && ! $source ]]; then esac fi -if [[ -v targets ]]; then - echo "targets: ${targets[*]}" -fi - -if [[ $source ]]; then - echo "source: $source" -fi - if [[ ${mountpoints[0]} ]]; then for mp in ${mountpoints[@]}; do if [[ -e /nocow/btrfs-stale/$mp ]]; then @@ -345,6 +351,9 @@ else if [[ $HOSTNAME == "$HOST2" ]]; then prospective_mps+=(/a /ar /qr /q) fi + if $kd_spread; then + prospective_mps=(/a /ar /o /qr /q) + fi fi # note: put q last just in case its specific retention options were to # affect other config sections. I havent tested if that is the case. @@ -362,10 +371,15 @@ else done fi -echo "mountpoints: ${mountpoints[*]}" +if (( ! ${#mountpoints[@]} )); then + die didnt get mountpoint arg and had no defaults +fi ##### end command line parsing ######## +#### begin pre-checks ##### + +# todo: this has a timing problem, since btrbk.timer could activate the service after this check. if ! $fast && [[ $source ]]; then if [[ $(ssh $source ps --no-headers -o comm 1) == systemd ]]; then status=$(ssh $source systemctl is-active btrbk.service) || : # normally returns 3 @@ -378,6 +392,38 @@ if ! $fast && [[ $source ]]; then fi fi +if ! command -v btrbk &>/dev/null; then + die "error: no btrbk binary found" +fi + + +#### end pre-checks ##### + +mkdir -p /var/log/btrbk +# The journal doesnt go back to my oldest backups, and I've found myself +# wanting older logs. Not going to bother expiring old logs, since it is +# fine if they go back years. +log_path=/var/log/btrbk/$(date +%F_%T%:::z).log +echo copying output to $log_path +exec &> >(ts "%F %T" | tee -a $log_path) + + +if $verbose; then + printf "$pre options: conf_only=%s\ndry_run=%s\nrate_limit=%s\nverbose=%s\ncmd_arg=%s" "$conf_only" "$dry_run" "$rate_limit" "$verbose" "$cmd_arg" +fi + +if [[ -v targets ]]; then + echo "targets: ${targets[*]}" +fi + +if [[ $source ]]; then + echo "source: $source" +fi + +echo "mountpoints: ${mountpoints[*]}" + + + # pull_reexec stops us from getting into an infinite loop if there is some # kind of weird problem pulla=false @@ -401,10 +447,6 @@ if ! $pull_reexec && [[ $source ]] && $pulla ; then fi -if ! command -v btrbk &>/dev/null; then - die "error: no btrbk binary found" -fi - if ! $fast; then # if our mountpoints are from stale snapshots, # it doesn't make sense to do a backup. @@ -434,23 +476,27 @@ else sshable=() sshfail=() - min_idle_ms=$((1000 * 60 * 15)) for h in ${targets[@]}; do if $fast || $conf_only; then # Use some typical values in this case root_size=$(( 1024 * 1024 * 2000 )) #2tb percent_used=10 zone=$(date +%z) - elif tmpstr=$(timeout -s 9 6 ssh root@$h "mkdir -p /mnt/root/btrbk /mnt/o/btrbk && date +%z && df --output=size,pcent / | tail -n1"); then - IFS=" " read -r -a remote_info <<<"$tmpstr" - - zone=${remote_info[0]} - root_size=${remote_info[1]} - percent_used=${remote_info[2]%%%} - - if (( ${#remote_info[@]} != 3 )); then - die "error: didnt get 3 fields in test ssh to target $h. investigate" + elif remote_str=$(timeout -s 9 6 ssh root@$h "mkdir -p /mnt/root/btrbk /mnt/o/btrbk && date +%z && df --output=size,pcent / | tail -n1"); then + mapfile -t tmp_array <<<"$remote_str" + zone="${tmp_array[0]}" + IFS=" " read -r root_size percent_used <<<"${tmp_array[1]}" + percent_used=${percent_used%%%} + + if (( ${#tmp_array[@]} != 2 )); then + die "error: didnt get 2 lines in test ssh to target $h. investigate" fi + case $percent_used in + [0-9]|[1-9][0-9]) : ;; + *) + die "error: didnt get percent disk use in test ssh to target $h. investigate" + ;; + esac else sshfail+=($h) continue @@ -466,15 +512,26 @@ else die "error: filesystem on target $h is $percent_used % full" fi - # This is a separate ssh because xprintidle can fail and thats ok. - if $cron && idle_ms=$(timeout -s 9 6 ssh $h DISPLAY=:0 xprintidle); then - if (( idle_ms < min_idle_ms )); then - + # on sy, xprintidle is resetting every 12 seconds even when not + # idle, i dunno why, instead we are checking if the screen is locked, + # which is good enough. + # + # This is a separate ssh because the command can fail and thatis ok. + if $cron; then + locked=false + if lock_info=$(timeout -s 9 6 ssh $h DISPLAY=:0 xscreensaver-command -time); then + if [[ $lock_info != *non-blanked* ]]; then + locked=true + fi + else + locked=true + fi + if ! $locked; then # Ignore this host. i sometimes use a non-main machine for # testing or web browsing, knowing that everything will be wiped # by the next backup, but I dont want it to happen as Im using # it from cronjob. - e "warning: $h: active X session in the last 15 minutes, skipping for now" + e "warning: $h: seems to be actively in use, skipping for now" continue fi fi @@ -522,11 +579,11 @@ snapshot_create onchange # if one disk had less space. # for now, keeping them equal. snapshot_preserve $std_preserve -snapshot_preserve_min 2h +snapshot_preserve_min 6h snapshot_dir btrbk # so, total backups = ~58 target_preserve $std_preserve -target_preserve_min 2h +target_preserve_min 6h # i tried this when investigating: clone no source subvolume found error #incremental_prefs sro:1 srn:1 sao san:1 aro:1 arn:1 @@ -654,14 +711,20 @@ done if [[ $source ]]; then m mount-latest-subvol "${subvols[@]}" else - m /a/exe/mount-latest-remote ${targets[@]} + for tg in ${targets[@]}; do + m /a/exe/mount-latest-remote "$tg" "${subvols[@]}" || ret=$? + done fi if [[ $ret == 0 ]]; then for tg in ${targets[@]}; do - : - #ssh root@$tg /a/exe/mail-backup-clean + h=$(ssh root@$tg hostname) + rsync -a /var/log/btrbk $tg:/var/log/btrbk/$tg + ssh root@$tg /usr/local/bin/mail-backup-clean done + if [[ $source ]]; then + rsync -a $source:/var/log/btrbk /var/log/btrbk/$source + fi fi mexit $ret