From 957fb54ecff2dbf0d1301873f131b3b4b3406220 Mon Sep 17 00:00:00 2001 From: Ian Kelling Date: Wed, 2 Oct 2024 23:35:19 -0400 Subject: [PATCH] big refactor to make code understandable, probably fixes bugs --- filesystem/usr/local/bin/mount-latest-subvol | 726 +++++++++++-------- filesystem/usr/local/bin/umount-funcs | 63 +- 2 files changed, 451 insertions(+), 338 deletions(-) diff --git a/filesystem/usr/local/bin/mount-latest-subvol b/filesystem/usr/local/bin/mount-latest-subvol index f74e42c..201d4dc 100755 --- a/filesystem/usr/local/bin/mount-latest-subvol +++ b/filesystem/usr/local/bin/mount-latest-subvol @@ -20,13 +20,13 @@ this_file="$(readlink -f -- "${BASH_SOURCE[0]}")" readonly this_file this_dir="${this_file%/*}" - cd / [[ $EUID == 0 ]] || exec sudo -E "$this_file" "$@" - set -e; . /usr/local/lib/bash-bear; set +e -shopt -s nullglob + +shopt -s nullglob +# shellcheck source=/a/bin/ds/filesystem/usr/local/bin/umount-funcs source "$this_dir/umount-funcs" usage() { @@ -47,24 +47,30 @@ EOF exit $1 } -pre="mount-latest-subvol:${SSH_CLIENT:+ $HOSTNAME:}" +readonly pre="mount-latest-subvol:${SSH_CLIENT:+ $HOSTNAME:}" log_path=/var/log/btrbk/$(date +%F_%H_%M_%S%:::z).log +readonly log_path +# tee unique lines. tu() { + local file line while read -r line; do file="$1" grep -xFq "$line" "$file" || tee -a "$file"<<<"$line" done } +# debug: echo when $verbose d() { if $verbose; then printf "$pre %s\n" "$*" fi } +# btrfs log command b() { printf "$pre running: %s\n" "$*" |& pee cat 'ts "%F %T" >>'$log_path "$@" |& pee cat 'ts "%F %T" >>'$log_path } +# if $verbose, print command before executing. m() { if $verbose; then printf "$pre %s\n" "$*" @@ -72,25 +78,32 @@ m() { "$@" } +# mkdir + mount from fstab. mnt() { - dir=$1 - if ! mountpoint -q $dir; then - mkdir -p $dir - m mount $dir - fi + local dir + for dir; do + # note: if we had umount-kill keep track of failed umounts, we would + # not need to check if it is a mountpoint. + if ! mountpoint -q $dir; then + mkdir -p $dir + m mount $dir + fi + done } +# write piped lines to fstab, wiping out any existing lines that have +# the same first 2 fields. fstab() { - while read -r start mpoint end; do - l="$start $mpoint $end" + local mount_source mpoint line_end l + while read -r mount_source mpoint line_end; do + l="$mount_source $mpoint $line_end" # kill off any lines that duplicate the mount point. sed --follow-symlinks -ri "\%$l%b;\%^\s*\S+\s+$mpoint\s%d" /etc/fstab tu /etc/fstab <<<"$l" done } - -# duplicated in check-subvol -# Reassign $1 var from /dev/dm- to corresponding /dev/mapper/ +# Note: duplicated in check-subvol-stale. +# Change the variable $1 references from /dev/dm- to corresponding /dev/mapper/ mapper-dev() { local mapdev local -n devref=$1 @@ -105,148 +118,130 @@ mapper-dev() { } -##### begin command line parsing ######## - -# you can remove this if you do not have options which can have args with spaces or empty. - -verbose=true -force=false -temp=$(getopt -l help,force,verbose hfv "$@") || usage 1 -eval set -- "$temp" -while true; do - case $1 in - -f|--force) force=true ;; - -v|--verbose) verbose=true ;; - -h|--help) usage ;; - --) shift; break ;; - *) echo "$0: unexpected args: $*" >&2 ; usage 1 ;; - esac - shift -done - -if (( $# )); then - all_vols=( "$@" ) -else - all_vols=(q a o i qd qr) - ar_snaps=(/mnt/root/btrbk/ar.*) - if [[ -e /mnt/root/ar ]] || (( ${#ar_snaps[@]} > 0 )); then - all_vols+=(ar) +# Input vars: $d, eg: /a +# +# Output vars: $root_dir, eg: /mnt/root +# +# Note: code-organizational only function. +get-btrfs-root-mountpoint() { + local dev devx + ### this is duplicated in check-subvol-stale + dev=$(sed -rn "s,^\s*([^#]\S*)\s+$d\s.*,\1,p" /etc/fstab /etc/mtab|head -n1) + # example dev: /dev/mapper/crypt-vgDISK_BY_ID_NAME-root + d dev=$dev + # $devx are the devices of the btrfs filesystem. they include $dev. + # + # Note, we can't just use $d in this sed because it might not be mounted. We do this loop + # because the device in fstab for the rootfs mountpoint can be different than $d. + for devx in $(btrfs fil show $dev| sed -rn 's#.*path (\S+)$#\1#p'); do + if [[ $devx == dm-* ]]; then + devx=/dev/$devx + mapper-dev devx + fi + d devx=$devx + root_dir=$(sed -rn "s,^\s*$devx\s+(\S+).*\bsubvolid=[05]\b.*,\1,p" /etc/mtab /etc/fstab|head -n1) + if [[ $root_dir ]]; then + d root_dir=$root_dir + break + fi + done + if [[ ! $root_dir ]]; then + echo "$0: error could not find root subvol mount for $dev" >&2 + exit 1 fi -fi - -##### end command line parsing ######## - -ret=0 -mkdir -p /var/log/btrbk - -##### begin setup fstab for subvols we care about ###### +} -if [[ -e /mnt/root/root2-crypttab ]]; then - tu /etc/crypttab leaf_time || ( leaf_new_limit_time > leaf_time && count > 30 ) )); then + b btrfs sub del $leaf fi - done < <(cat /mnt/root/root2-crypttab) -fi -if [[ -e /mnt/root/root2-fstab ]]; then - tu /etc/fstab /dev/null; then - crypt_dev=$root_dev -else # if we are in a recovery boot, find the next best crypt device - mopts=,noauto - # todo: I think I had an idea to not setup /o in this case, - # but never finished implementing it - for dev in $(dmsetup ls --target crypt | awk '{print $1}'); do - dev=/dev/mapper/$dev - if awk '{print $1}' /etc/mtab | grep -Fx $dev &>/dev/null; then - crypt_dev=$dev +# Check if subvol $1 is different than its btrbk parent subvol, delete it if not. +# +# Silently give up if $1 is not a child of a btrbk subvol. +# +# Input vars: $root_dir (from get-btrfs-root-mountpoint) +dedupe-btrbk() { + local parentid subvol parent_sub i lines + local -a bsubs + subvol="$1" + parentid=$(btrfs sub show $subvol | awk '$1 == "Parent" && $2 == "UUID:" {print $3}') + bsubs=($root_dir/btrbk/$vol.*) + # go in reverse order as its more likely to be at the end + for ((i=${#bsubs[@]}-1; i>=0; i--)); do + if [[ $parentid == $(btrfs sub show ${bsubs[i]} | awk '$1 == "UUID:" {print $2}') ]]; then + parent_sub=${bsubs[i]} break fi done -fi - - - -# dont tax the cpus of old laptops -if (( $(nproc) > 2)); then - mopts+=,compress=zstd -fi - -fstab </dev/null; then - continue +# Maybe add /mnt/{r,b}oot2 to fstab and mount them. +# Input vars: none +setup-root2() { + local mapper_dev + if [[ -e /mnt/root/root2-crypttab ]]; then + tu /etc/crypttab /dev/null; then + crypt_dev=$root_dev + else # if we are in a recovery boot, find the next best crypt device + mopts=,noauto + # todo: I think I had an idea to not setup /o in this case, + # but never finished implementing it + for dev in $(dmsetup ls --target crypt | awk '{print $1}'); do + dev=/dev/mapper/$dev + if awk '{print $1}' /etc/mtab | grep -Fx $dev &>/dev/null; then + crypt_dev=$dev break fi done - if $found_loop; then - break - fi - done - if $found_loop; then - continue fi - ##### end end checking loopback mounts ##### - pruned_vols+=("$vol") -done -### end pruning volumes ### + # dont tax the cpus of old laptops + if (( $(nproc) > 2)); then + mopts+=,compress=zstd + fi -umount_dirs=() -declare -A umount_vols + fstab </dev/null; then + continue + fi + + ##### begin skip $vol if it has a loopback mount within it ##### + found_loop=false + for l in $(losetup -ln|awk '{print $6}'); do + for dir in $d ${binds[@]}; do + if [[ $l == $dir* ]]; then + echo "$0: found loopback mount $l. giving up on unmounting $dir" + ret=1 + found_loop=true + break + fi + done + if $found_loop; then + break fi done - umount_dirs+=($d) - umount_vols[$vol]=t - else - mnt $d - did=$(stat -c%d $d) + if $found_loop; then + continue + fi + ##### end ##### + + pruned_vols+=("$vol") + done +} + + +# Find and fix the case where a bind mount's original source directory +# got remounted with a different filesystem. +# +# Input vars: ${pruned_vols[@]} +bind-mismatch-fix() { + local vol d_id b b_id found_mismatch + local -a to_umount + for vol in ${pruned_vols[@]}; do + set-d-binds + found_mismatch=false + d_id=$(stat -c%d $d) for b in ${binds[@]}; do if mountpoint -q $b; then - bid=$(stat -c%d $b) - if [[ $did != "$bid" ]]; then - umount_dirs+=($b) - umount_vols[$vol]=t + b_id=$(stat -c%d $b) + if [[ $d_id != "$b_id" ]]; then + found_mismatch=false + to_umount+=($b) fi - else - mnt $b fi done - fi -done - -if (( ${#umount_dirs[@]} >= 1 )); then - umount_ret=true - unmounted=() - umount-kill ${umount_dirs[@]} - - if $umount_ret; then - for vol in ${!umount_vols[@]}; do - vol-setup - fresh_snap=$(cat /nocow/btrfs-stale/$vol 2>/dev/null) - - #### begin dealing with leaf vols #### - - ### begin getting root_dir - ### this is duplicated in check-subvol-stale + if $found_mismatch; then + d "WARNING: fixing bind mount(s): ${to_umount[*]}, pointing to old subvol." + umount-kill ${to_umount[@]} + m mnt ${to_umount[@]} + fi + done +} - dev=$(sed -rn "s,^\s*([^#]\S*)\s+$d\s.*,\1,p" /etc/fstab /etc/mtab|head -n1) - d dev=$dev - # note, we need $dev because $d might not be mounted, and we do this loop - # because the device in fstab for the rootfs can be different. - for devx in $(btrfs fil show $dev| sed -rn 's#.*path (\S+)$#\1#p'); do - if [[ $devx == dm-* ]]; then - devx=/dev/$devx - mapper-dev devx - fi - d devx=$devx - root_dir=$(sed -rn "s,^\s*$devx\s+(\S+).*\bsubvolid=[05]\b.*,\1,p" /etc/mtab /etc/fstab|head -n1) - if [[ $root_dir ]]; then - d root_dir=$root_dir - break +# Check for stale vols, store their mountpoints. Make sure others are mounted. +# +# Input vars: ${pruned_vols[@]} +# Output vars: +# ${to_umount[@]}: dirs to unmount +# ${umount_vols[@]}: the underlying vols of to_umount. +plan-umounts() { + local vol dir + # directories we want to umount + to_umount=() + # volumes and/or their bind mounts that we want to be umounted (and + # may already be that way). + umount_vols=() + + for vol in ${pruned_vols[@]}; do + set-d-binds + + # If latest is already mounted, make sure binds are mounted and move on. + m check-subvol-stale $d + if [[ -s /nocow/btrfs-stale/$vol ]]; then + umount_vols+=($vol) + for dir in ${binds[@]} $d; do + if mountpoint -q $dir; then + to_umount=($dir) fi done - if [[ ! $root_dir ]]; then - echo "$0: error could not find root subvol mount for $dev" >&2 - exit 1 - fi - ### end getting root_dir + umount-dirs-add ${binds[@]} $d + else + mnt $d ${binds[@]} + fi + done +} - cd $root_dir - if [[ -e $vol ]]; then - if [[ $vol == qd ]]; then - b btrfs sub del qd - else - leaf=$vol.leaf.$(date +%Y-%m-%dT%H:%M:%S%z) - b mv $vol $leaf - b btrfs property set -ts $leaf ro true - - ### begin check if leaf is different, delete it if not ### - parentid=$(btrfs sub show $leaf | awk '$1 == "Parent" && $2 == "UUID:" {print $3}') - bsubs=(btrbk/$vol.*) - bsub= # base subvolume - # go in reverse order as its more likely to be at the end - for ((i=${#bsubs[@]}-1; i>=0; i--)); do - if [[ $parentid == $(btrfs sub show ${bsubs[i]} | awk '$1 == "UUID:" {print $2}') ]]; then - bsub=${bsubs[i]} - break - fi - done - if [[ $bsub ]]; then - # in testing, same subvol is 136 bytes. allow some overhead. 32 happens sometimes under systemd. - # $ errno 32 - # EPIPE 32 Broken pipe - lines=$(btrfs send --no-data -p $bsub $leaf | btrfs receive --dump | head -n 100 | wc -l || [[ $? == 141 || ${PIPESTATUS[0]} == 32 ]]) - if [[ $lines == 0 ]]; then - # example output of no differences: - # snapshot ./qrtest uuid=c41ff6b7-0527-f34d-95ac-190eecf54ff5 transid=2239 parent_uuid=64949e1b-4a3e-3945-9a8e-cd7b7c15d7d6 parent_transid=2239 - echo suspected identical: $bsub $leaf - b btrfs sub del $leaf - fi - fi - ### end check if leaf is different, delete it if not ### - - ## begin expire leaf vols ## - leaf_vols=($vol.leaf.*) - count=${#leaf_vols[@]} - leaf_limit_time=$(( EPOCHSECONDS - 60*60*24*60 )) # 60 days - leaf_new_limit_time=$(( EPOCHSECONDS - 60*60*24 * 5 )) # 5 days this - # goes backwards from oldest. leaf_new_limit_time is a safety - # measure to ensure we don't delete very recent leafs. - for leaf in ${leaf_vols[@]}; do - leaf_time=$(date -d ${leaf#"$vol".leaf.} +%s) - if (( leaf_limit_time > leaf_time || ( leaf_new_limit_time > leaf_time && count > 30 ) )); then - b btrfs sub del $leaf - fi - count=$((count-1)) - done - fi - ## end expire leaf vols ## +# This is like mount-latest-subvol, but for some subvols that have no +# mountpoint of their own. +# +# Input vars: none +amy-root2-latest() { + local dir vol root_dir fresh_snap + local -a kill_dirs + for dir in /mnt/r7/amy/{root/root,boot/boot}_ubuntubionic /mnt/{root2/root,boot2/boot}_ubuntubionic; do + vol=${dir##*/} # eg: boot_ubuntubionic + root_dir=${dir%/*} # eg: /mnt/root2 + if [[ ! -d $root_dir ]]; then + # currently, 2 dirs exist on one host, 2 on another. + continue + fi + # if latest is already mounted, make sure binds are mounted and move on + m check-subvol-stale -p $dir + # populated by check-subvol-stale if stale + if ! fresh_snap=$(cat /nocow/btrfs-stale/$vol 2>/dev/null); then + continue + fi + if [[ -d $dir ]]; then + kill_dirs=($dir) + if ! kill-dir TERM TERM TERM INT INT HUP HUP TERM TERM TERM INT INT HUP HUP; then + if $force; then kill-dir KILL; fi + fi + if ! b btrfs sub del $dir; then + echo "$0: ERROR: failed to delete subvolume $dir" >&2 + ret=1 fi - #### end dealing with leaf vols #### + fi + + b btrfs sub snapshot $fresh_snap $dir + rm -f /nocow/btrfs-stale/$vol + done +} - # Note, we make a few assumptions in this script, like - # $d was not a different subvol id than $vol, and - # things otherwise didn't get mounted very strangely. + +# Input vars: ${to_umount[@]} ${umount_vols[@]} +# +# note: if we ever did binds of binds, the ordering of to_umount +# would need some reversing. +umount-then-mount-latest() { + local vol fresh_snap + umount-kill ${to_umount[@]} + for vol in ${umount_vols[@]}; do + set-d-binds + if $umount_ret; then + fresh_snap=$(cat /nocow/btrfs-stale/$vol 2>/dev/null) + get-btrfs-root-mountpoint + cd $root_dir + mv-vol-to-leaf + expire-leaf-vols b btrfs sub snapshot $fresh_snap $vol - for dir in $d ${binds[@]}; do - m mnt $dir - done rm -f /nocow/btrfs-stale/$vol - done - else - # If we unmounted some but not all, restore them. - for dir in ${unmounted[@]}; do - mnt $dir - done + fi + m mnt $d ${binds[@]} + done +} + + +##### begin command line parsing ######## + +# you can remove this if you do not have options which can have args with spaces or empty. + +verbose=true +force=false +temp=$(getopt -l help,force,verbose hfv "$@") || usage 1 +eval set -- "$temp" +while true; do + case $1 in + -f|--force) force=true ;; + -v|--verbose) verbose=true ;; + -h|--help) usage ;; + --) shift; break ;; + *) echo "$0: unexpected args: $*" >&2 ; usage 1 ;; + esac + shift +done + +readonly verbose force + +if (( $# )); then + all_vols=( "$@" ) +else + all_vols=(q a o i qd qr) + ar_snaps=(/mnt/root/btrbk/ar.*) + if [[ -e /mnt/root/ar ]] || (( ${#ar_snaps[@]} > 0 )); then + all_vols+=(ar) fi fi +##### end command line parsing ######## -for dir in /mnt/r7/amy/{root/root,boot/boot}_ubuntubionic /mnt/{root2/root,boot2/boot}_ubuntubionic; do - vol=${dir##*/} - root_dir=${dir%/*} - if [[ ! -d $root_dir ]]; then - # this only exists on one host. - continue - fi - # if latest is already mounted, make sure binds are mounted and move on - m check-subvol-stale -p $dir - # populated by check-subvol-stale if stale - if ! fresh_snap=$(cat /nocow/btrfs-stale/$vol 2>/dev/null); then - continue - fi - if [[ -d $dir ]]; then - kill_dirs=($dir) - if ! kill-dir TERM TERM TERM INT INT HUP HUP TERM TERM TERM INT INT HUP HUP; then - if $force; then kill-dir KILL; fi - fi - b btrfs sub del $dir - fi - b btrfs sub snapshot $fresh_snap $dir - rm -f /nocow/btrfs-stale/$vol -done +ret=0 +mkdir -p /var/log/btrbk + +setup-root2 +setup-fstab +prune-vols +bind-mismatch-fix +plan-umounts +if (( ${#umount_vols[@]} >= 1 )); then + umount-then-mount-latest +fi +amy-root2-latest if (( ret >= 1 )); then - echo "$0: exit status $ret. see error above" + echo "$0: exit status $ret. see error(s) above" >&2 fi exit $ret diff --git a/filesystem/usr/local/bin/umount-funcs b/filesystem/usr/local/bin/umount-funcs index 794dfcb..8e3f68c 100644 --- a/filesystem/usr/local/bin/umount-funcs +++ b/filesystem/usr/local/bin/umount-funcs @@ -9,6 +9,7 @@ if ! type -p m &>/dev/null; then } fi get-my-pids() { + local p count loop_limit if (( ${#my_pids[@]} )); then return 0 fi @@ -25,11 +26,12 @@ get-my-pids() { done } pid-check() { + local p m get-my-pids - for p in ${pids}; do + for p in $pids; do for m in ${my_pids[@]}; do if (( p == m )); then - echo "$0: error: pids to kill includes our pid or a parent. ps output:" >&2 + echo "$0: error: cant kill pid: $p, same as our pid or a parent: $m. ps output:" >&2 ps -f -p $p exit 1 fi @@ -45,7 +47,12 @@ get-pids() { dir_regex="$dir_regex|${kill_dirs[$i]}" fi done - pids=$(timeout 15 lsof -XF n | awk 'BEGIN { RS = "\np" }; {if($1 in printed_pids) next; for(i=2;i<=NF;i++){ if($i ~ "^n('$dir_regex')(/|$)") { printed_pids[$1]=1; printf("%s ", $1); break} } }') + # Note: perhaps this might catch additional processes: fuser -m + # $dir. However, I know this lsof catches things that doesn't. + + # Note on -b: I read the lsof manual, and it doesn't sound like we are doing + # -anything that -b prevents, but I'm not 100% sure. + pids=$(timeout 15 lsof -wbXF n | awk 'BEGIN { RS = "\np" }; {if($1 in printed_pids) next; for(i=2;i<=NF;i++){ if($i ~ "^n('$dir_regex')(/|$)") { printed_pids[$1]=1; printf("%s ", $1); break} } }') } get-pids-debug() { @@ -66,7 +73,13 @@ get-pids-debug() { } kill-dir() { - local pids i + local -a sigs + local pids i sig first_pid + + if ! (( ${#kill_dirs[@]} >= 1 )); then + echo "kill-dir: error: ${kill_dirs[@]} is empty!" + exit 1 + fi sigs=("$@") for (( i=0; i<= $#; i++ )); do @@ -97,43 +110,39 @@ kill-dir() { done } -umount-try() { - local ret=0 - umounted=false - m umount -R $1 || ret=$? - if (( ret == 0 )); then - umounted=true - unmounted+=($dir) - fi - return $ret -} # leaf function. others are just used by this one. umount-kill() { local -a kill_dirs - for dir; do - if ! mountpoint -q $dir; then + local umount_kill_dir killed_dir + for umount_kill_dir; do + if ! mountpoint -q $umount_kill_dir; then echo "expected mountpoint args, got $dir" return 1 fi done - for dir; do - if ! umount-try $dir; then - kill_dirs+=($dir) + for umount_kill_dir; do + if ! m umount -R $umount_kill_dir; then + kill_dirs+=($umount_kill_dir) fi done + if (( ${#kill_dirs[@]} == 0 )); then + return 0 + fi + if ! kill-dir TERM TERM TERM INT INT HUP HUP TERM TERM TERM INT INT HUP HUP; then if [[ $force ]] && $force; then kill-dir KILL; fi fi - # If we fail, consider trying this to see if it catches more processes: - # fuser -m $dir - umount-try $dir ||: - if ! $umounted; then - echo "$0: failed to umount $dir" - umount_ret=false - ret=1 - fi + + umount_ret=true + for killed_dir in ${kill_dirs[@]}; do + if ! m umount -R $killed_dir; then + echo "$0: ERROR: failed to umount $dir." >&2 + umount_ret=false + ret=1 + fi + done } -- 2.30.2