#!/bin/bash # Copyright (C) 2016 Ian Kelling # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # todo: if we cancel in the middle of a btrfs send, then run again # immediately, the received subvolume doesn't get a Received UUID: # field, and we won't mount it. Need to figure out a solution that will # fix this. [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@" source /usr/local/lib/err usage() { cat <<'EOF' btrbk-run [OPTIONS] [run|resume|archive] usually -t TARGET_HOST or -s SOURCE_HOST Note, at source location, intentionally not executable, run and read install-my-scripts. EOF echo "top of script file:" sed -n '1,/^[# ]*end command line/{p;b};q' "$0" exit $1 } script_name="${BASH_SOURCE[0]}" script_name="${script_name##*/}" pre="${SSH_CLIENT:+$HOSTNAME} $script_name:" m() { if $verbose; then printf "$pre%s\n" "$*"; fi; "$@"; } e() { printf "$pre%s\n" "$*"; } die() { printf "$pre%s\n" "$*" >&2; echo "exiting with status 1" >&2; exit 1; } mexit() { echo "$pre: exiting with status $1"; exit $1; } # latest $MAIL_HOST if [[ -e /b/bash_unpublished/source-state ]]; then source /b/bash_unpublished/source-state fi # note q is owned by root:1000 mountpoints=() rsync_mountpoint=/q ret=0 # default options conf_only=false dry_run=false # mostly for testing rate_limit=no verbose=true; verbose_arg=-v if [[ $INVOCATION_ID ]]; then # INVOCATION_ID means running as a systemd service. we cant show progress in this case, # but if we pass the arg, it will insert mbuffer into the command. progress_arg= else progress_arg="--progress" fi incremental_strict=false pull_reexec=false default_args_file=/etc/btrbk-run.conf if [[ -s $default_args_file ]]; then # shellcheck disable=SC2046 # we want word splitting set -- $(< $default_args_file) "$@" # i havent used this feature yet, so warn about it echo "$0: warning: default btrbk-run options set in $default_args_file (sleeping 5 seconds):" cat $default_args_file sleep 5 fi targets=() early=false cron=false fast=false kd_spread_maybe=false orig_args=("$@") temp=$(getopt -l cron,fast,pull-reexec,help 23ceikl:m:npqrs:t:vh "$@") || usage 1 eval set -- "$temp" while true; do case $1 in # some behaviors specific to running under cron: # - skip hosts where xprintidle haven't been idle recently # - if we can't ssh to 1 or more hosts, still do the rest # - if we aren't MAIL_HOST and no -m or -s, just exit --cron) cron=true pre= ;; # for the rare case we want to run multiple instances at the same time -2) conf_suf=2 ;; -3) conf_suf=3 ;; # only creates the config file, does not run btrbk -c) conf_only=true ;; # quit early, just btrbk, no extra remounting etc. -e) early=true ;; # skip various checks. when we run twice in a row for # switch mail-host, no need to repeat the same checks again. --fast) fast=true ;; -i) incremental_strict=true ;; # note this implies resume -k) kd_spread_maybe=true ;; # bytes per second, suffix k m g -l) rate_limit=$2; shift ;; # Comma separated mountpoints to backup. This has defaults set below. -m) IFS=, mountpoints=($2); unset IFS; shift ;; -n) dry_run=true ;; # hide progress -p) progress_arg= ;; # internal option for rerunning under newer SOURCE_HOST version. --pull-reexec) pull_reexec=true;; # quiet -q) verbose=false; verbose_arg=; progress_arg= ;; # source host to receive a backup from -s) source=$2 bbksource=$source if [[ $source == *:* ]]; then bbksource="[$source]" fi shift ;; # target hosts to send to. empty is valid for just doing local # snapshot. we have default hosts we will populate. -t) IFS=, targets=($2); unset IFS; shift ;; # verbose. -v) verbose=true; verbose_arg=-v ;; -h|--help) usage ;; --) shift; break ;; *) die "Internal error!" ;; esac shift done if $kd_spread_maybe; then if [[ $1 && $1 != resume ]]; then die "dont pass -k without resume or empty run arg" fi cmd_arg=resume fi cmd_arg=${1:-run} std_preserve="36h 14d 8w 24m" q_preserve="18h 14d 8w" case $cmd_arg in run|resume) : ;; # This works better than the normal archive command. We have to # specify the mount points, but that is what we are used to doing and # we prefer it. Another difference is that archive works recursively # and we don't care about that. Sometimes we may still want to run # btrbk archive, but it doesn't even use the config file, so just # run it directly, eg: # time s btrbk -v archive /mnt/r7/amy/boot/btrbk ssh://bo/mnt/boot2/btrbk archive) cmd_arg=resume std_preserve="999h 999d 999w 999m" q_preserve="$std_preserve" preserve_arg=-p ;; *) die "untested command arg" ;; esac if (( $# > 1 )); then die: "only 1 nonoption arg is supported" fi if [[ -v targets && $source ]]; then # note, this doesnt need to be the case, but # we would need to think about it. die "error: -t and -s are mutually exclusive" fi ### end options parsing # remove path from earlier version of btrbk rm -f /usr/sbin/btrbk # note, this still works as intended if there is no /usr/bin/btrbk if [[ /a/opt/btrbk/btrbk -nt /usr/bin/btrbk ]]; then if [[ -e /b/distro-functions/src/package-manager-abstractions ]]; then . /b/distro-functions/src/package-manager-abstractions pi asciidoctor fi cd /a/opt/btrbk m make install fi # TODO: i wonder if there should be an option to send to the default # targets, plus any given on the command line. kd_spread=false # set default targets if [[ ! -v targets && ! $source ]]; then if $cron; then if [[ $HOSTNAME != "$MAIL_HOST" ]]; then if $kd_spread_maybe && [[ $HOSTNAME == kd && $MAIL_HOST == x3 ]]; then if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then work_host=x3.office.fsf.org elif ping -q -c1 -w1 x3wg.b8.nz &>/dev/null; then work_host=x3wg.b8.nz fi if [[ $work_host ]]; then source_state="$(ssh $work_host cat /a/bin/bash_unpublished/source-state)" eval "$source_state" if [[ $MAIL_HOST == x3 ]]; then kd_spread=true else echo "MAIL_HOST=$MAIL_HOST, nothing to do" mexit 0 fi else echo "MAIL_HOST=$MAIL_HOST, nothing to do" mexit 0 fi else echo "MAIL_HOST=$MAIL_HOST, nothing to do" mexit 0 fi fi fi at_work=false at_home=false case $HOSTNAME in kw|kd|frodo|x2|x3|sy) : ;; *) die "error: no default targets for this host, use -t" ;; esac case $HOSTNAME in kw) at_work=true ;;& kd|frodo) at_home=true ;;& x2|x3|sy) if [[ $(dig +short @10.2.0.1 -x 10.2.0.2 2>&1 ||:) == kd.b8.nz. ]] \ && ip n show 10.2.0.1 | grep . &>/dev/null; then at_home=true elif ping -q -c1 -w1 hal.office.fsf.org &>/dev/null \ && ip n show 192.168.0.26 | grep . &>/dev/null; then at_work=true fi ;;& *) if $at_home; then if ! $kd_spread && [[ $HOSTNAME != x3 ]]; then # main work machine if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then targets+=(x3.office.fsf.org) elif ping -q -c1 -w1 $h.b8.nz &>/dev/null; then # in case we took it home targets+=(x3.b8.nz) else targets+=(x3wg.b8.nz) fi fi # temporarily disabled while doing recovery # for h in frodo kd; do for h in kd; do if [[ $HOSTNAME == "$h" ]]; then continue fi targets+=($h.b8.nz) done for h in x2 sy; do if [[ $HOSTNAME == "$h" ]]; then continue fi if ping -q -c1 -w1 $h.b8.nz &>/dev/null; then targets+=($h.b8.nz) elif ping -q -c1 -w1 ${h}w.b8.nz &>/dev/null; then targets+=(${h}w.b8.nz) fi done elif $at_work; then targets+=(i.b8.nz) for h in x2 x3 kw; do if [[ $HOSTNAME == "$h" ]]; then continue fi if ping -q -c1 -w1 $h.office.fsf.org &>/dev/null; then targets+=($h.office.fsf.org) fi done else targets+=(i.b8.nz) fi ;; esac fi if $verbose; then printf "$pre options: conf_only=%s\ndry_run=%s\nrate_limit=%s\nverbose=%s\ncmd_arg=%s" "$conf_only" "$dry_run" "$rate_limit" "$verbose" "$cmd_arg" fi if [[ -v targets ]]; then echo "targets: ${targets[*]}" fi if [[ $source ]]; then echo "source: $source" fi if [[ ${mountpoints[0]} ]]; then for mp in ${mountpoints[@]}; do if [[ -e /nocow/btrfs-stale/$mp ]]; then die "error: $mp is stale, mount-latest-subvol first" fi done else # set default mountpoints if [[ ${targets[0]} == tp ]]; then prospective_mps=(/a) else case $HOSTNAME in *) prospective_mps=() if [[ $source ]]; then source_state="$(ssh $source cat /a/bin/bash_unpublished/source-state)" eval "$source_state" source_host="$(ssh $source cat /etc/hostname)" if [[ $source_host == "$MAIL_HOST" ]]; then prospective_mps+=(/o) fi if [[ $source_host == "$HOST2" ]]; then prospective_mps+=(/a /ar /qr /q) fi else if [[ $HOSTNAME == "$MAIL_HOST" ]]; then prospective_mps+=(/o) fi if [[ $HOSTNAME == "$HOST2" ]]; then prospective_mps+=(/a /ar /qr /q) fi if $kd_spread; then prospective_mps=(/a /ar /o /qr /q) fi fi # note: put q last just in case its specific retention options were to # affect other config sections. I havent tested if that is the case. ;; esac fi for mp in ${prospective_mps[@]}; do # default mountpoints to sync if [[ -e /nocow/btrfs-stale/$mp ]]; then e "warning: $mp stale, not adding to default mountpoints" continue fi if awk '{print $2}' /etc/fstab | grep -xF $mp &>/dev/null; then mountpoints+=($mp) fi done fi if (( ! ${#mountpoints[@]} )); then die didnt get mountpoint arg and had no defaults fi echo "mountpoints: ${mountpoints[*]}" ##### end command line parsing ######## # todo: this has a timing problem, since btrbk.timer could activate the service after this check. if ! $fast && [[ $source ]]; then if [[ $(ssh $source ps --no-headers -o comm 1) == systemd ]]; then status=$(ssh $source systemctl is-active btrbk.service) || : # normally returns 3 case $status in inactive|failed) : ;; *) echo "$0: error: cron btrbk is running on source. exiting out of caution" mexit 1 esac fi fi # pull_reexec stops us from getting into an infinite loop if there is some # kind of weird problem pulla=false for m in "${mountpoints[@]}"; do if [[ $m == /a ]]; then pulla=true break fi done if ! $pull_reexec && [[ $source ]] && $pulla ; then tmpf=$(mktemp) m rsync -ra $source:/usr/local/bin/{mount-latest-subvol,check-subvol-stale} /usr/local/bin m rsync -ra $source:/usr/local/lib/err /usr/local/lib m scp $source:/a/bin/distro-setup/btrbk-run $tmpf if ! diff -q $tmpf ${BASH_SOURCE[0]}; then e "found different version on host $source. reexecing" install -T $tmpf /usr/local/bin/btrbk-run m /usr/local/bin/btrbk-run --pull-reexec "${orig_args[@]}" mexit 0 fi fi if ! command -v btrbk &>/dev/null; then die "error: no btrbk binary found" fi if ! $fast; then # if our mountpoints are from stale snapshots, # it doesn't make sense to do a backup. m check-subvol-stale ${mountpoints[@]} || die "found stale mountpoints in ${mountpoints[*]}" # for an initial run, btrbk requires the dir to exist. mkdir -p /mnt/{root,o}/btrbk fi local_zone=$(date +%z) if [[ $source ]]; then if $fast; then zone=$local_zone else if ! zone=$(ssh root@$source date +%z); then if $conf_only; then echo "$0: warning: failed to ssh to root@$source" else die failed to ssh to root@$source fi fi if [[ $zone != "$local_zone" ]]; then die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost" fi fi else sshable=() sshfail=() for h in ${targets[@]}; do if $fast || $conf_only; then # Use some typical values in this case root_size=$(( 1024 * 1024 * 2000 )) #2tb percent_used=10 zone=$(date +%z) elif remote_str=$(timeout -s 9 6 ssh root@$h "mkdir -p /mnt/root/btrbk /mnt/o/btrbk && date +%z && df --output=size,pcent / | tail -n1"); then mapfile -t tmp_array <<<"$remote_str" zone="${tmp_array[0]}" IFS=" " read -r root_size percent_used <<<"${tmp_array[1]}" percent_used=${percent_used%%%} if (( ${#tmp_array[@]} != 2 )); then die "error: didnt get 2 lines in test ssh to target $h. investigate" fi case $percent_used in [0-9]|[1-9][0-9]) : ;; *) die "error: didnt get percent disk use in test ssh to target $h. investigate" ;; esac else sshfail+=($h) continue fi # we may be booted into a bootstrap fs or something min_root_kb=$(( 1024 * 1024 * 200 )) # 200 gb if (( root_size < min_root_kb )); then continue fi if (( percent_used >= 98 )); then die "error: filesystem on target $h is $percent_used % full" fi # on sy, xprintidle is resetting every 12 seconds even when not # idle, i dunno why, instead we are checking if the screen is locked, # which is good enough. # # This is a separate ssh because the command can fail and thatis ok. if $cron; then locked=false if lock_info=$(timeout -s 9 6 ssh $h DISPLAY=:0 xscreensaver-command -time); then if [[ $lock_info != *non-blanked* ]]; then locked=true fi else locked=true fi if ! $locked; then # Ignore this host. i sometimes use a non-main machine for # testing or web browsing, knowing that everything will be wiped # by the next backup, but I dont want it to happen as Im using # it from cronjob. e "warning: $h: seems to be actively in use, skipping for now" continue fi fi sshable+=($h) if [[ $zone != "$local_zone" ]]; then die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost" fi done if [[ ! ${sshable[*]} ]] || { ! $cron && [[ ${sshfail[*]} ]]; }; then die "failed to ssh to hosts: ${sshfail[*]}" else if [[ ${sshfail[*]} ]]; then ret=1 e "error: failed to ssh to ${sshfail[*]} but continuing with other hosts" fi targets=(${sshable[@]}) fi fi cat >/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf </dev/null; then for tg in ${targets[@]}; do case $tg in tp) dirs=(/p/c/machine_specific/tp) for x in /p/c/machine_specific/*.hosts; do if grep -qxF $tg $x; then dirs+=(${x%.hosts}) fi done m rsync -aSAXPH --specials --devices --delete --relative ${dirs[@]} root@$tg:/ ;; esac done fi subvols=() for mp in "${mountpoints[@]}"; do subvols+=("${mp##*/}") done if [[ $source ]]; then m mount-latest-subvol "${subvols[@]}" else for tg in ${targets[@]}; do m /a/exe/mount-latest-remote "$tg" "${subvols[@]}" || ret=$? done fi if [[ $ret == 0 ]]; then for tg in ${targets[@]}; do : ssh root@$tg /a/exe/mail-backup-clean done fi mexit $ret # todo: move variable data we don't care about backing up # to /nocow and symlink it. # background on btrbk timezones. with short/long, timestamps use local time. # for long, if your local time moves backwards, by moving timezones or # for an hour when daylight savings changes it, you will temporarily get # a more aggressive retention policy for the overlapping period, and # vice versa for the opposite timezone move. The alternative is using # long-iso, which puts timezone info into the timestamp, which means # that instead of shifting time, you shift the start of day/week/month # which is used for retention to your new local time, which means for # example, if you moved forward by 8 hours, the daily/weekly/monthly # retention will be 8 hours more aggressive since midnight is at a new # time, unless you fake the timzeone using the TZ env variable. # However, in the short term, there will be no inconsistencies. # I don't see any problem with shifting when the day starts for # retention, so I'm using long-iso. # note to create a long-iso timestamp: date +%Y%m%dT%H%M%S%z