-pre="${0##*/}: "
+script_name="${BASH_SOURCE[0]}"
+script_name="${script_name##*/}"
+pre="${SSH_CLIENT:+$HOSTNAME} $script_name:"
m() { if $verbose; then printf "$pre%s\n" "$*"; fi; "$@"; }
e() { printf "$pre%s\n" "$*"; }
die() { printf "$pre%s\n" "$*" >&2; echo "exiting with status 1" >&2; exit 1; }
dry_run=false # mostly for testing
rate_limit=no
verbose=true; verbose_arg=-v
-progress_arg="--progress"
+if [[ $INVOCATION_ID ]]; then
+ # INVOCATION_ID means running as a systemd service. we cant show progress in this case,
+ # but if we pass the arg, it will insert mbuffer into the command.
+ progress_arg=
+else
+ progress_arg="--progress"
+fi
incremental_strict=false
pull_reexec=false
default_args_file=/etc/btrbk-run.conf
if [[ -s $default_args_file ]]; then
+ # shellcheck disable=SC2046 # we want word splitting
set -- $(< $default_args_file) "$@"
# i havent used this feature yet, so warn about it
echo "$0: warning: default btrbk-run options set in $default_args_file (sleeping 5 seconds):"
sleep 5
fi
+targets=()
early=false
cron=false
+fast=false
orig_args=("$@")
-temp=$(getopt -l cron,pull-reexec,help ceil:m:npqrs:t:vh "$@") || usage 1
+temp=$(getopt -l cron,fast,pull-reexec,help 23ceil:m:npqrs:t:vh "$@") || usage 1
eval set -- "$temp"
while true; do
case $1 in
--cron)
cron=true
pre=
- shift
;;
+ # for the rare case we want to run multiple instances at the same time
+ -2) conf_suf=2 ;;
+ -3) conf_suf=3 ;;
# only creates the config file, does not run btrbk
- -c) conf_only=true; shift ;;
+ -c) conf_only=true ;;
# quit early, just btrbk, no extra remounting etc.
- -e) early=true; shift ;;
- -i) incremental_strict=true; shift ;;
+ -e) early=true ;;
+ # skip various checks. when we run twice in a row for
+ # switch mail-host, no need to repeat the same checks again.
+ --fast) fast=true ;;
+ -i) incremental_strict=true ;;
# bytes per second, suffix k m g
- -l) rate_limit=$2; shift 2 ;;
+ -l) rate_limit=$2; shift ;;
# Comma separated mountpoints to backup. This has defaults set below.
- -m) IFS=, mountpoints=($2); unset IFS; shift 2 ;;
- -n) dry_run=true; dry_run_arg=-n; shift ;;
- # show progress
- -p) progress_arg="--progress"; shift ;;
+ -m) IFS=, mountpoints=($2); unset IFS; shift ;;
+ -n) dry_run=true ;;
+ # hide progress
+ -p) progress_arg= ;;
# internal option for rerunning under newer SOURCE_HOST version.
- --pull-reexec) pull_reexec=true; shift ;;
+ --pull-reexec) pull_reexec=true;;
# quiet
- -q) verbose=false; verbose_arg=; progress_arg=; shift ;;
+ -q) verbose=false; verbose_arg=; progress_arg= ;;
# source host to receive a backup from
-s)
source=$2
if [[ $source == *:* ]]; then
bbksource="[$source]"
fi
- shift 2
+ shift
;;
# target hosts to send to. empty is valid for just doing local
# snapshot. we have default hosts we will populate.
- -t) IFS=, targets=($2); unset IFS; shift 2 ;;
+ -t) IFS=, targets=($2); unset IFS; shift ;;
# verbose.
- -v) verbose=true; verbose_arg=-v; shift ;;
+ -v) verbose=true; verbose_arg=-v ;;
-h|--help) usage ;;
--) shift; break ;;
*) die "Internal error!" ;;
esac
+ shift
done
cmd_arg=${1:-run}
-std_preserve="18h 14d 8w 24m"
-q_preserve="18h 14d"
+std_preserve="36h 14d 8w 24m"
+q_preserve="18h 14d 8w"
case $cmd_arg in
run|resume) : ;;
# run it directly, eg:
# time s btrbk -v archive /mnt/r7/amy/boot/btrbk ssh://bo/mnt/boot2/btrbk
archive)
- cmd=resume
+ cmd_arg=resume
std_preserve="999h 999d 999w 999m"
q_preserve="$std_preserve"
preserve_arg=-p
fi
if $verbose; then
- printf "options: conf_only=%s\ndry_run=%s\nrate_limit=%s\nverbose=%s\ncmd_arg=%s" "$conf_only" "$dry_run" "$rate_limit" "$verbose" "$cmd_arg"
+ printf "$pre options: conf_only=%s\ndry_run=%s\nrate_limit=%s\nverbose=%s\ncmd_arg=%s" "$conf_only" "$dry_run" "$rate_limit" "$verbose" "$cmd_arg"
fi
### end options parsing
# targets, plus any given on the command line.
-amy=false
+
+kd_spread=false
# set default targets
if [[ ! -v targets && ! $source ]]; then
- if [[ $HOSTNAME != "$MAIL_HOST" ]] && $cron ; then
- echo "MAIL_HOST=$MAIL_HOST, nothing to do"
- mexit 0
- else
- amy=true
+ if $cron; then
+ if [[ $HOSTNAME != "$MAIL_HOST" ]]; then
+ if [[ $HOSTNAME == kd && $MAIL_HOST == x3 ]]; then
+ if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then
+ work_host=x3.office.fsf.org
+ elif ping -q -c1 -w1 x3wg.b8.nz &>/dev/null; then
+ work_host=x3wg.b8.nz
+ fi
+ if [[ $work_host ]]; then
+ source_state="$(ssh $work_host cat /a/bin/bash_unpublished/source-state)"
+ eval "$source_state"
+ if [[ $MAIL_HOST == x3 ]]; then
+ kd_spread=true
+ else
+ echo "MAIL_HOST=$MAIL_HOST, nothing to do"
+ mexit 0
+ fi
+ else
+ echo "MAIL_HOST=$MAIL_HOST, nothing to do"
+ mexit 0
+ fi
+ else
+ echo "MAIL_HOST=$MAIL_HOST, nothing to do"
+ mexit 0
+ fi
+ fi
fi
at_work=false
+ at_home=false
+
+ case $HOSTNAME in
+ kw|kd|frodo|x2|x3|sy) : ;;
+ *)
+ die "error: no default targets for this host, use -t"
+ ;;
+ esac
- targets=(frodo.b8.nz)
case $HOSTNAME in
- x2|kw)
+ kw)
at_work=true
;;&
- x2|x3|sy|bo)
- if ping -q -c1 -w1 hal.office.fsf.org \
+ kd|frodo)
+ at_home=true
+ ;;&
+ x2|x3|sy)
+ if [[ $(dig +short @10.2.0.1 -x 10.2.0.2 2>&1 ||:) == kd.b8.nz. ]] \
+ && ip n show 10.2.0.1 | grep . &>/dev/null; then
+ at_home=true
+ elif ping -q -c1 -w1 hal.office.fsf.org &>/dev/null \
&& ip n show 192.168.0.26 | grep . &>/dev/null; then
at_work=true
fi
;;&
- kw|x2|x3|sy|bo)
- if $at_work; then
+ *)
+ if $at_home; then
+ if ! $kd_spread; then
+ # main work machine
+ if ping -q -c1 -w1 x3.office.fsf.org &>/dev/null; then
+ targets+=(x3.office.fsf.org)
+ else
+ targets+=(x3wg.b8.nz)
+ fi
+ fi
+ # temporarily disabled while doing recovery
+# for h in frodo kd; do
+ for h in kd; do
+ if [[ $HOSTNAME == "$h" ]]; then
+ continue
+ fi
+ targets+=($h.b8.nz)
+ done
+ for h in x2 x3 sy; do
+ if [[ $HOSTNAME == "$h" ]]; then
+ continue
+ fi
+ if ping -q -c1 -w1 $h.b8.nz &>/dev/null; then
+ targets+=($h.b8.nz)
+ elif ping -q -c1 -w1 ${h}w.b8.nz &>/dev/null; then
+ targets+=(${h}w.b8.nz)
+ fi
+ done
+ elif $at_work; then
if ping -q -c1 -w1 iank.vpn.office.fsf.org &>/dev/null; then
- home=iank.vpn.office.fsf.org
+ targets+=(iank.vpn.office.fsf.org)
else
- home=i.b8.nz
+ targets+=(i.b8.nz)
fi
+ for h in x2 x3 kw; do
+ if [[ $HOSTNAME == "$h" ]]; then
+ continue
+ fi
+ if ping -q -c1 -w1 $h.office.fsf.org &>/dev/null; then
+ targets+=($h.office.fsf.org)
+ fi
+ done
else
- home=b8.nz
- fi
- ;;&
- kw)
- targets+=($home x2.office.fsf.org)
- ;;
- x2|x3|sy|bo)
- targets+=($home)
- if $at_work; then
- targets+=(x2.office.fsf.org x2.b8.nz)
- else
- targets+=(x2wg.b8.nz)
- fi
- ;;
- kd)
- targets+=(x2wg.b8.nz x3.b8.nz)
- if ping -q -c1 -w1 sy.b8.nz &>/dev/null; then
- targets+=(sy.b8.nz)
- else
- targets+=(syw.b8.nz)
+ targets+=(i.b8.nz)
fi
;;
- frodo)
- # no targets
- targets=()
- ;;
- *)
- die "error: no default targets for this host, use -t"
- ;;
esac
fi
prospective_mps=(/a)
else
case $HOSTNAME in
- frodo)
- prospective_mps=(/i)
- ;;
*)
prospective_mps=()
if [[ $source ]]; then
if [[ $source_host == "$MAIL_HOST" ]]; then
prospective_mps+=(/o)
fi
+ if [[ $source_host == "$HOST2" ]]; then
+ prospective_mps+=(/a /ar /qr /q)
+ fi
else
if [[ $HOSTNAME == "$MAIL_HOST" ]]; then
- # HOST2 is really the mail host if it exists
- if [[ $HOST2 && $HOST2 != "$HOSTNAME" ]]; then
- echo "skipping /o because HOST2 is not us"
- else
- prospective_mps+=(/o)
- fi
+ prospective_mps+=(/o)
+ fi
+ if [[ $HOSTNAME == "$HOST2" ]]; then
+ prospective_mps+=(/a /ar /qr /q)
fi
fi
# note: put q last just in case its specific retention options were to
# affect other config sections. I havent tested if that is the case.
- prospective_mps+=(/a /ar /qr /q)
;;
esac
fi
##### end command line parsing ########
-if [[ $source ]]; then
+# todo: this has a timing problem, since btrbk.timer could activate the service after this check.
+if ! $fast && [[ $source ]]; then
if [[ $(ssh $source ps --no-headers -o comm 1) == systemd ]]; then
status=$(ssh $source systemctl is-active btrbk.service) || : # normally returns 3
case $status in
if ! command -v btrbk &>/dev/null; then
die "error: no btrbk binary found"
fi
-# if our mountpoints are from stale snapshots,
-# it doesn't make sense to do a backup.
-check-subvol-stale ${mountpoints[@]} || die "found stale mountpoints in ${mountpoints[*]}"
-# for an initial run, btrbk requires the dir to exist.
-mkdir -p /mnt/{root,o}/btrbk
+if ! $fast; then
+ # if our mountpoints are from stale snapshots,
+ # it doesn't make sense to do a backup.
+ m check-subvol-stale ${mountpoints[@]} || die "found stale mountpoints in ${mountpoints[*]}"
+
+ # for an initial run, btrbk requires the dir to exist.
+ mkdir -p /mnt/{root,o}/btrbk
+fi
local_zone=$(date +%z)
if [[ $source ]]; then
- if ! zone=$(ssh root@$source date +%z); then
- die failed to ssh to root@$source
- fi
- if [[ $zone != "$local_zone" ]]; then
- die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost"
+ if $fast; then
+ zone=$local_zone
+ else
+ if ! zone=$(ssh root@$source date +%z); then
+ if $conf_only; then
+ echo "$0: warning: failed to ssh to root@$source"
+ else
+ die failed to ssh to root@$source
+ fi
+ fi
+ if [[ $zone != "$local_zone" ]]; then
+ die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost"
+ fi
fi
-
else
sshable=()
sshfail=()
min_idle_ms=$((1000 * 60 * 15))
for h in ${targets[@]}; do
- if remote_info=( $(timeout -s 9 6 ssh root@$h "mkdir -p /mnt/root/btrbk /mnt/o/btrbk && date +%z && df --output=size,pcent / | tail -n1") ); then
- zone=${remote_info[0]}
- root_size=${remote_info[1]}
- percent_used=${remote_info[2]%%%}
-
- if (( ${#remote_info[@]} != 3 )); then
- die "error: didnt get 3 fields in test ssh to target $h. investigate"
+ if $fast || $conf_only; then
+ # Use some typical values in this case
+ root_size=$(( 1024 * 1024 * 2000 )) #2tb
+ percent_used=10
+ zone=$(date +%z)
+ elif remote_str=$(timeout -s 9 6 ssh root@$h "mkdir -p /mnt/root/btrbk /mnt/o/btrbk && date +%z && df --output=size,pcent / | tail -n1"); then
+ mapfile -t tmp_array <<<"$remote_str"
+ zone="${tmp_array[0]}"
+ IFS=" " read -r root_size percent_used <<<"${tmp_array[1]}"
+ percent_used=${percent_used%%%}
+
+ if (( ${#tmp_array[@]} != 2 )); then
+ die "error: didnt get 2 lines in test ssh to target $h. investigate"
fi
+ case $percent_used in
+ [0-9]|[1-9][0-9]) : ;;
+ *)
+ die "error: didnt get percent disk use in test ssh to target $h. investigate"
+ ;;
+ esac
+ else
+ sshfail+=($h)
+ continue
+ fi
+ # we may be booted into a bootstrap fs or something
+ min_root_kb=$(( 1024 * 1024 * 200 )) # 200 gb
+ if (( root_size < min_root_kb )); then
+ continue
+ fi
- # we may be booted into a bootstrap fs or something
- min_root_kb=$(( 1024 * 1024 * 200 )) # 200 gb
- if (( root_size < min_root_kb )); then
- continue
- fi
-
- if (( percent_used >= 98 )); then
- die "error: filesystem on target $h is $percent_used % full"
- fi
+ if (( percent_used >= 98 )); then
+ die "error: filesystem on target $h is $percent_used % full"
+ fi
- # This is a separate ssh because xprintidle can fail and thats ok.
- if $cron && idle_ms=$(timeout -s 9 6 ssh $h DISPLAY=:0 xprintidle); then
- if (( idle_ms < min_idle_ms )); then
+ # This is a separate ssh because xprintidle can fail and thats ok.
+ if $cron && idle_ms=$(timeout -s 9 6 ssh $h DISPLAY=:0 xprintidle); then
+ if (( idle_ms < min_idle_ms )); then
- # Ignore this host. i sometimes use a non-main machine for
- # testing or web browsing, knowing that everything will be wiped
- # by the next backup, but I dont want it to happen as Im using
- # it from cronjob.
- e "warning: $h: active X session in the last 15 minutes, skipping for now"
- continue
- fi
- fi
- sshable+=($h)
- if [[ $zone != "$local_zone" ]]; then
- die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost"
+ # Ignore this host. i sometimes use a non-main machine for
+ # testing or web browsing, knowing that everything will be wiped
+ # by the next backup, but I dont want it to happen as Im using
+ # it from cronjob.
+ e "warning: $h: active X session in the last 15 minutes, skipping for now"
+ continue
fi
- else
- sshfail+=($h)
+ fi
+ sshable+=($h)
+ if [[ $zone != "$local_zone" ]]; then
+ die "error: dont confuse yourself with multiple time zones. $h has different timezone than localhost"
fi
done
if [[ ! ${sshable[*]} ]] || { ! $cron && [[ ${sshfail[*]} ]]; }; then
fi
-cat >/etc/btrbk.conf <<EOF
+cat >/etc/btrbk$conf_suf.conf <<EOF
ssh_identity /q/root/h
#ssh_identity /root/.ssh/home
transaction_syslog local7
# trying this out
-stream_compress zstd
+#stream_compress zstd
# so we only run one at a time
-lockfile /var/lock/btrbk.lock
+lockfile /var/lock/btrbk$conf_suf.lock
# default format of short does not accomidate hourly preservation setting
timestamp_format long-iso
target_preserve $std_preserve
target_preserve_min 2h
+# i tried this when investigating: clone no source subvolume found error
+#incremental_prefs sro:1 srn:1 sao san:1 aro:1 arn:1
+
# if something fails and it's not obvious, try doing
# btrbk -l debug -v dryrun
EOF
if $incremental_strict; then
- cat >>/etc/btrbk.conf <<EOF
+ cat >>/etc/btrbk$conf_suf.conf <<EOF
incremental strict
EOF
fi
+qconf() {
+ case $sub in
+ q)
+ # q has sensitive data i dont want to backup for so long
+ cat >>/etc/btrbk$conf_suf.conf <<EOF
+snapshot_preserve $q_preserve
+snapshot_preserve_min 2h
+snapshot_dir btrbk
+target_preserve $q_preserve
+target_preserve_min 2h
+EOF
+ ;;
+ esac
+
+}
+
+# make /q be last
+mp_count=${#mountpoints[@]}
+for (( i=0; i < mp_count - 1 ; i++ )); do
+ if [[ ${mountpoints[i]} == /q ]]; then
+ unset "mountpoints[i]"
+ mountpoints+=(/q)
+ fi
+done
+
for m in ${mountpoints[@]}; do
case $m in
/o)
sub=${m#/}
if [[ $source ]]; then
- cat >>/etc/btrbk.conf <<EOF
+ cat >>/etc/btrbk$conf_suf.conf <<EOF
volume ssh://$bbksource$vol
subvolume $sub
+EOF
+ qconf
+ cat >>/etc/btrbk$conf_suf.conf <<EOF
target send-receive $vol/btrbk
EOF
fi
if (( ${#targets[@]} )); then
- cat >>/etc/btrbk.conf <<EOF
+ cat >>/etc/btrbk$conf_suf.conf <<EOF
volume $vol
subvolume $sub
EOF
- case $sub in
- q)
- # q has sensitive data i dont want to backup for so long
- cat >>/etc/btrbk.conf <<EOF
-snapshot_preserve $std_preserve
-snapshot_preserve_min 2h
-snapshot_dir btrbk
-target_preserve $std_preserve
-target_preserve_min 2h
-EOF
- ;;
- esac
+ qconf
for tg in ${targets[@]}; do
# handle ipv6
if [[ $tg == *:* ]]; then
tg="[$tg]"
fi
- cat >>/etc/btrbk.conf <<EOF
+ cat >>/etc/btrbk$conf_suf.conf <<EOF
target send-receive ssh://$tg$vol/btrbk
EOF
done
fi
done
-# if $amy; then
-# # to manually backup amy,
-# # bbk -e -s amy -m root_ubuntubionic
-# cat >>/etc/btrbk.conf <<'EOF'
-# volume ssh://amy/mnt/root
-# subvolume root_ubuntubionic
-# target send-receive /mnt/root/btrbk
-# EOF
-# fi
-
# todo: umount first to ensure we don't have any errors
# todo: do some kill fuser stuff to make umount more reliable
if $dry_run; then
- m btrbk -v -n $cmd_arg
+ m btrbk -c /etc/btrbk$conf_suf.conf -v -n $cmd_arg
mexit 0
fi
# -q and just using the syslog option seemed nice,
# but it doesn't show when a send has a parent and when it doesn't.
-m btrbk $preserve_arg $verbose_arg $progress_arg $cmd_arg
+m btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg $cmd_arg
+
+if $early; then
+ exit 0
+fi
# todo: tp not valid anymore.
# if we have it, sync to systems which don't
done
fi
+subvols=()
+for mp in "${mountpoints[@]}"; do
+ subvols+=("${mp##*/}")
+done
if [[ $source ]]; then
- m mount-latest-subvol
+ m mount-latest-subvol "${subvols[@]}"
else
- m /a/exe/mount-latest-remote ${targets[@]}
+ for tg in ${targets[@]}; do
+ m /a/exe/mount-latest-remote "$tg" "${subvols[@]}" || ret=$?
+ done
fi
if [[ $ret == 0 ]]; then