#!/bin/bash # Copyright (C) 2016 Ian Kelling # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # todo: if we cancel in the middle of a btrfs send, then run again # immediately, the received subvolume doesn't get a Received UUID: # field, and we won't mount it. Need to figure out a solution that will # fix this. [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@" source /usr/local/lib/err usage() { cat <<'EOF' btrbk-run [OPTIONS] [run|resume|archive] usually -t TARGET_HOST or -s SOURCE_HOST Note, at source location, intentionally not executable, run and read install-my-scripts. EOF echo "top of script file:" sed -n '1,/^[# ]*end command line/{p;b};q' "$0" exit $1 } pre="${0##*/}: " m() { if $verbose; then printf "$pre%s\n" "$*"; fi; "$@"; } e() { printf "$pre%s\n" "$*"; } die() { printf "$pre%s\n" "$*" >&2; echo "exiting with status 1" >&2; exit 1; } mexit() { echo "$pre: exiting with status $1"; exit $1; } # latest $MAIL_HOST if [[ -e /b/bash_unpublished/source-state ]]; then source /b/bash_unpublished/source-state fi # note q is owned by root:1000 mountpoints=() rsync_mountpoint=/q ret=0 # default options conf_only=false dry_run=false # mostly for testing rate_limit=no verbose=true; verbose_arg=-v progress_arg="--progress" incremental_strict=false pull_reexec=false default_args_file=/etc/btrbk-run.conf if [[ -s $default_args_file ]]; then set -- $(< $default_args_file) "$@" # i havent used this feature yet, so warn about it echo "$0: warning: default btrbk-run options set in $default_args_file (sleeping 5 seconds):" cat $default_args_file sleep 5 fi early=false cron=false orig_args=("$@") temp=$(getopt -l cron,pull-reexec,help ceil:m:npqrs:t:vh "$@") || usage 1 eval set -- "$temp" while true; do case $1 in # some behaviors specific to running under cron: # - skip hosts where xprintidle haven't been idle recently # - if we can't ssh to 1 or more hosts, still do the rest # - if we aren't MAIL_HOST and no -m or -s, just exit --cron) cron=true pre= shift ;; # only creates the config file, does not run btrbk -c) conf_only=true; shift ;; # quit early, just btrbk, no extra remounting etc. -e) early=true; shift ;; -i) incremental_strict=true; shift ;; # bytes per second, suffix k m g -l) rate_limit=$2; shift 2 ;; # Comma separated mountpoints to backup. This has defaults set below. -m) IFS=, mountpoints=($2); unset IFS; shift 2 ;; -n) dry_run=true; dry_run_arg=-n; shift ;; # show progress -p) progress_arg="--progress"; shift ;; # internal option for rerunning under newer SOURCE_HOST version. --pull-reexec) pull_reexec=true; shift ;; # quiet -q) verbose=false; verbose_arg=; progress_arg=; shift ;; # source host to receive a backup from -s) source=$2 bbksource=$source if [[ $source == *:* ]]; then bbksource="[$source]" fi shift 2 ;; # target hosts to send to. empty is valid for just doing local # snapshot. we have default hosts we will populate. -t) IFS=, targets=($2); unset IFS; shift 2 ;; # verbose. -v) verbose=true; verbose_arg=-v; shift ;; -h|--help) usage ;; --) shift; break ;; *) die "Internal error!" ;; esac done cmd_arg=${1:-run} std_preserve="18h 14d 8w 24m" q_preserve="18h 14d" case $cmd_arg in run|resume) : ;; # This works better than the normal archive command. We have to # specify the mount points, but that is what we are used to doing and # we prefer it. Another difference is that archive works recursively # and we don't care about that. Sometimes we may still want to run # btrbk archive, but it doesn't even use the config file, so just # run it directly, eg: # time s btrbk -v archive /mnt/r7/amy/boot/btrbk ssh://bo/mnt/boot2/btrbk archive) cmd_arg=resume std_preserve="999h 999d 999w 999m" q_preserve="$std_preserve" preserve_arg=-p ;; *) die "untested command arg" ;; esac if (( $# > 1 )); then die: "only 1 nonoption arg is supported" fi if [[ -v targets && $source ]]; then # note, this doesnt need to be the case, but # we would need to think about it. die "error: -t and -s are mutually exclusive" fi if $verbose; then printf "options: conf_only=%s\ndry_run=%s\nrate_limit=%s\nverbose=%s\ncmd_arg=%s" "$conf_only" "$dry_run" "$rate_limit" "$verbose" "$cmd_arg" fi ### end options parsing # remove path from earlier version of btrbk rm -f /usr/sbin/btrbk # note, this still works as intended if there is no /usr/bin/btrbk if [[ /a/opt/btrbk/btrbk -nt /usr/bin/btrbk ]]; then if [[ -e /b/distro-functions/src/package-manager-abstractions ]]; then . /b/distro-functions/src/package-manager-abstractions pi asciidoctor fi cd /a/opt/btrbk m make install fi # TODO: i wonder if there should be an option to send to the default # targets, plus any given on the command line. amy=false # set default targets if [[ ! -v targets && ! $source ]]; then if [[ $HOSTNAME != "$MAIL_HOST" ]] && $cron ; then echo "MAIL_HOST=$MAIL_HOST, nothing to do" mexit 0 else amy=true fi at_work=false targets=(frodo.b8.nz) case $HOSTNAME in x2|kw) at_work=true ;;& x2|x3|sy|bo) if ping -q -c1 -w1 hal.office.fsf.org \ && ip n show 192.168.0.26 | grep . &>/dev/null; then at_work=true fi ;;& kw|x2|x3|sy|bo) if $at_work; then if ping -q -c1 -w1 iank.vpn.office.fsf.org &>/dev/null; then home=iank.vpn.office.fsf.org else home=i.b8.nz fi else home=b8.nz fi ;;& kw) targets+=($home x2.office.fsf.org) ;; x2|x3|sy|bo) targets+=($home) if $at_work; then targets+=(x2.office.fsf.org x2.b8.nz) else targets+=(x2wg.b8.nz) fi ;; kd) targets+=(x2wg.b8.nz x3.b8.nz) if ping -q -c1 -w1 sy.b8.nz &>/dev/null; then targets+=(sy.b8.nz) else targets+=(syw.b8.nz) fi ;; frodo) # no targets targets=() ;; *) die "error: no default targets for this host, use -t" ;; esac fi if [[ -v targets ]]; then echo "targets: ${targets[*]}" fi if [[ $source ]]; then echo "source: $source" fi if [[ ${mountpoints[0]} ]]; then for mp in ${mountpoints[@]}; do if [[ -e /nocow/btrfs-stale/$mp ]]; then die "error: $mp is stale, mount-latest-subvol first" fi done else # set default mountpoints if [[ ${targets[0]} == tp ]]; then prospective_mps=(/a) else case $HOSTNAME in frodo) prospective_mps=(/i) ;; *) prospective_mps=() if [[ $source ]]; then source_state="$(ssh $source cat /a/bin/bash_unpublished/source-state)" eval "$source_state" source_host="$(ssh $source cat /etc/hostname)" if [[ $source_host == "$MAIL_HOST" ]]; then prospective_mps+=(/o) fi else if [[ $HOSTNAME == "$MAIL_HOST" ]]; then # HOST2 is really the mail host if it exists if [[ $HOST2 && $HOST2 != "$HOSTNAME" ]]; then echo "skipping /o because HOST2 is not us" else prospective_mps+=(/o) fi fi fi # note: put q last just in case its specific retention options were to # affect other config sections. I havent tested if that is the case. prospective_mps+=(/a /ar /qr /q) ;; esac fi for mp in ${prospective_mps[@]}; do # default mountpoints to sync if [[ -e /nocow/btrfs-stale/$mp ]]; then e "warning: $mp stale, not adding to default mountpoints" continue fi if awk '{print $2}' /etc/fstab | grep -xF $mp &>/dev/null; then mountpoints+=($mp) fi done fi echo "mountpoints: ${mountpoints[*]}" ##### end command line parsing ######## if [[ $source ]]; then if [[ $(ssh $source ps --no-headers -o comm 1) == systemd ]]; then status=$(ssh $source systemctl is-active btrbk.service) || : # normally returns 3 case $status in inactive|failed) : ;; *) echo "$0: error: cron btrbk is running on source. exiting out of caution" mexit 1 esac fi fi # pull_reexec stops us from getting into an infinite loop if there is some # kind of weird problem pulla=false for m in "${mountpoints[@]}"; do if [[ $m == /a ]]; then pulla=true break fi done if ! $pull_reexec && [[ $source ]] && $pulla ; then tmpf=$(mktemp) m rsync -ra $source:/usr/local/bin/{mount-latest-subvol,check-subvol-stale} /usr/local/bin m rsync -ra $source:/usr/local/lib/err /usr/local/lib m scp $source:/a/bin/distro-setup/btrbk-run $tmpf if ! diff -q $tmpf ${BASH_SOURCE[0]}; then e "found different version on host $source. reexecing" install -T $tmpf /usr/local/bin/btrbk-run m /usr/local/bin/btrbk-run --pull-reexec "${orig_args[@]}" mexit 0 fi fi if ! command -v btrbk &>/dev/null; then die "error: no btrbk binary found" fi # if our mountpoints are from stale snapshots, # it doesn't make sense to do a backup. check-subvol-stale ${mountpoints[@]} || die "found stale mountpoints in ${mountpoints[*]}" # for an initial run, btrbk requires the dir to exist. mkdir -p /mnt/{root,o}/btrbk local_zone=$(date +%z) if [[ $source ]]; then if ! zone=$(ssh root@$source date +%z); then if $conf_only; then echo "$0: warning: failed to ssh to root@$source" else die failed to ssh to root@$source fi fi if [[ $zone != "$local_zone" ]]; then die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost" fi else sshable=() sshfail=() min_idle_ms=$((1000 * 60 * 15)) for h in ${targets[@]}; do if remote_info=( $(timeout -s 9 6 ssh root@$h "mkdir -p /mnt/root/btrbk /mnt/o/btrbk && date +%z && df --output=size,pcent / | tail -n1") ); then zone=${remote_info[0]} root_size=${remote_info[1]} percent_used=${remote_info[2]%%%} if (( ${#remote_info[@]} != 3 )); then die "error: didnt get 3 fields in test ssh to target $h. investigate" fi elif $conf_only; then # Use some typical values in this case root_size=$(( 1024 * 1024 * 2000 )) #2tb percent_used=10 zone=$(date +%z) else sshfail+=($h) continue fi # we may be booted into a bootstrap fs or something min_root_kb=$(( 1024 * 1024 * 200 )) # 200 gb if (( root_size < min_root_kb )); then continue fi if (( percent_used >= 98 )); then die "error: filesystem on target $h is $percent_used % full" fi # This is a separate ssh because xprintidle can fail and thats ok. if $cron && idle_ms=$(timeout -s 9 6 ssh $h DISPLAY=:0 xprintidle); then if (( idle_ms < min_idle_ms )); then # Ignore this host. i sometimes use a non-main machine for # testing or web browsing, knowing that everything will be wiped # by the next backup, but I dont want it to happen as Im using # it from cronjob. e "warning: $h: active X session in the last 15 minutes, skipping for now" continue fi fi sshable+=($h) if [[ $zone != "$local_zone" ]]; then die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost" fi done if [[ ! ${sshable[*]} ]] || { ! $cron && [[ ${sshfail[*]} ]]; }; then die "failed to ssh to hosts: ${sshfail[*]}" else if [[ ${sshfail[*]} ]]; then ret=1 e "error: failed to ssh to ${sshfail[*]} but continuing with other hosts" fi targets=(${sshable[@]}) fi fi cat >/etc/btrbk.conf <>/etc/btrbk.conf <>/etc/btrbk.conf <>/etc/btrbk.conf <>/etc/btrbk.conf <>/etc/btrbk.conf <>/etc/btrbk.conf <<'EOF' # volume ssh://amy/mnt/root # subvolume root_ubuntubionic # target send-receive /mnt/root/btrbk # EOF # fi # todo: umount first to ensure we don't have any errors # todo: do some kill fuser stuff to make umount more reliable if $conf_only; then mexit 0 fi if $dry_run; then m btrbk -v -n $cmd_arg mexit 0 fi # -q and just using the syslog option seemed nice, # but it doesn't show when a send has a parent and when it doesn't. m btrbk $preserve_arg $verbose_arg $progress_arg $cmd_arg # todo: tp not valid anymore. # if we have it, sync to systems which don't if mountpoint $rsync_mountpoint >/dev/null; then for tg in ${targets[@]}; do case $tg in tp) dirs=(/p/c/machine_specific/tp) for x in /p/c/machine_specific/*.hosts; do if grep -qxF $tg $x; then dirs+=(${x%.hosts}) fi done m rsync -aSAXPH --specials --devices --delete --relative ${dirs[@]} root@$tg:/ ;; esac done fi if [[ $source ]]; then m mount-latest-subvol else m /a/exe/mount-latest-remote ${targets[@]} fi if [[ $ret == 0 ]]; then for tg in ${targets[@]}; do : #ssh root@$tg /a/exe/mail-backup-clean done fi mexit $ret # todo: move variable data we don't care about backing up # to /nocow and symlink it. # background on btrbk timezones. with short/long, timestamps use local time. # for long, if your local time moves backwards, by moving timezones or # for an hour when daylight savings changes it, you will temporarily get # a more aggressive retention policy for the overlapping period, and # vice versa for the opposite timezone move. The alternative is using # long-iso, which puts timezone info into the timestamp, which means # that instead of shifting time, you shift the start of day/week/month # which is used for retention to your new local time, which means for # example, if you moved forward by 8 hours, the daily/weekly/monthly # retention will be 8 hours more aggressive since midnight is at a new # time, unless you fake the timzeone using the TZ env variable. # However, in the short term, there will be no inconsistencies. # I don't see any problem with shifting when the day starts for # retention, so I'm using long-iso. # note to create a long-iso timestamp: date +%Y%m%dT%H%M%S%z