From: Ian Kelling Date: Mon, 30 Sep 2024 23:24:01 +0000 (-0400) Subject: refactor to run single lsof that is slower but more robust X-Git-Url: https://iankelling.org/git/?a=commitdiff_plain;h=6ca23a68da3b26ee590cbdd70739f965d85a926f;p=distro-setup refactor to run single lsof that is slower but more robust --- diff --git a/filesystem/usr/local/bin/mount-latest-subvol b/filesystem/usr/local/bin/mount-latest-subvol index c7f2971..fd2aaf4 100755 --- a/filesystem/usr/local/bin/mount-latest-subvol +++ b/filesystem/usr/local/bin/mount-latest-subvol @@ -242,12 +242,8 @@ fi ##### end setup fstab for subvols we care about ###### -for vol in ${all_vols[@]}; do +vol-setup() { d=/$vol - if ! awk '$3 == "btrfs" {print $2}' /etc/fstab | grep -xF $d &>/dev/null; then - continue - fi - ##### begin building up list of bind mounts ###### binds=() # list of bind mounts @@ -266,29 +262,16 @@ for vol in ${all_vols[@]}; do roots=( ${new_roots[@]} ) done ##### end building up list of bind mounts ###### +} - - # if latest is already mounted, make sure binds are mounted and move on - m check-subvol-stale $d - # populated by check-subvol-stale if stale - if ! fresh_snap=$(cat /nocow/btrfs-stale/$vol 2>/dev/null); then - mnt $d - did=$(stat -c%d $d) - for b in ${binds[@]}; do - if mountpoint -q $b; then - bid=$(stat -c%d $b) - if [[ $did != "$bid" ]]; then - umount-kill $b - fi - fi - mnt $b - done +### begin pruning volumes ### +pruned_vols=() +for vol in ${all_vols[@]}; do + if ! awk '$3 == "btrfs" {print $2}' /etc/fstab | grep -xF /$vol &>/dev/null; then continue fi - - - ##### begin checking for loopback mounts #### + ##### begin checking for loopback mounts ##### found_loop=false for l in $(losetup -ln|awk '{print $6}'); do for dir in $d ${binds[@]}; do @@ -306,126 +289,155 @@ for vol in ${all_vols[@]}; do if $found_loop; then continue fi - ##### end end checking loopback mounts #### + ##### end end checking loopback mounts ##### + pruned_vols+=("$vol") +done +### end pruning volumes ### - ## not using arbtt at the moment - # if [[ $vol == q ]]; then - # ## allow to fail, user might not be logged in - # x sudo -u $(id -nu 1000) XDG_RUNTIME_DIR=/run/user/1000 systemctl --user stop arbtt ||: - # fi - umount_ret=true - unmounted=() - for dir in $(echo $d ${binds[*]}\ |tac -s\ ); do - umount-kill $dir - done +umount_dirs=() +declare -A umount_vols - # if we unmounted some but not all, restore them and move on - if ! $umount_ret; then - for dir in ${unmounted[@]}; do - mnt $dir - done - continue - fi - #### begin dealing with leaf vols #### +for vol in ${pruned_vols[@]}; do + vol-setup - ### begin getting root_dir - ### this is duplicated in check-subvol-stale - - dev=$(sed -rn "s,^\s*([^#]\S*)\s+$d\s.*,\1,p" /etc/fstab /etc/mtab|head -n1) - d dev=$dev - # note, we need $dev because $d might not be mounted, and we do this loop - # because the device in fstab for the rootfs can be different. - for devx in $(btrfs fil show $dev| sed -rn 's#.*path (\S+)$#\1#p'); do - if [[ $devx == dm-* ]]; then - devx=/dev/$devx - mapper-dev devx - fi - d devx=$devx - root_dir=$(sed -rn "s,^\s*$devx\s+(\S+).*\bsubvolid=[05]\b.*,\1,p" /etc/mtab /etc/fstab|head -n1) - if [[ $root_dir ]]; then - d root_dir=$root_dir - break - fi - done - if [[ ! $root_dir ]]; then - echo "$0: error could not find root subvol mount for $dev" >&2 - exit 1 + # if latest is already mounted, make sure binds are mounted and move on + m check-subvol-stale $d + # populated by check-subvol-stale if stale + if [[ -s /nocow/btrfs-stale/$vol ]]; then + for b in ${binds[@]}; do + # note: if we ever did binds of binds, the ordering of umount_dirs + # would need to be reversed here. + if mountpoint -q $b; then + umount_dirs+=($b) + fi + done + umount_dirs+=($d) + umount_vols[$vol]=t + else + mnt $d + did=$(stat -c%d $d) + for b in ${binds[@]}; do + if mountpoint -q $b; then + bid=$(stat -c%d $b) + if [[ $did != "$bid" ]]; then + umount_dirs+=($b) + umount_vols[$vol]=t + fi + else + mnt $b + fi + done fi - ### end getting root_dir - - cd $root_dir - if [[ -e $vol ]]; then - if [[ $vol == qd ]]; then - b btrfs sub del qd - else - leaf=$vol.leaf.$(date +%Y-%m-%dT%H:%M:%S%z) - b mv $vol $leaf - b btrfs property set -ts $leaf ro true - - ### begin check if leaf is different, delete it if not ### - parentid=$(btrfs sub show $leaf | awk '$1 == "Parent" && $2 == "UUID:" {print $3}') - bsubs=(btrbk/$vol.*) - bsub= # base subvolume - # go in reverse order as its more likely to be at the end - for ((i=${#bsubs[@]}-1; i>=0; i--)); do - if [[ $parentid == $(btrfs sub show ${bsubs[i]} | awk '$1 == "UUID:" {print $2}') ]]; then - bsub=${bsubs[i]} +done + +if (( ${#umount_dirs[@]} >= 1 )); then + umount_ret=true + unmounted=() + umount-kill ${umount_dirs[@]} + + if $umount_ret; then + for vol in ${!umount_vols[@]}; do + vol-setup + fresh_snap=$(cat /nocow/btrfs-stale/$vol 2>/dev/null) + + #### begin dealing with leaf vols #### + + ### begin getting root_dir + ### this is duplicated in check-subvol-stale + + dev=$(sed -rn "s,^\s*([^#]\S*)\s+$d\s.*,\1,p" /etc/fstab /etc/mtab|head -n1) + d dev=$dev + # note, we need $dev because $d might not be mounted, and we do this loop + # because the device in fstab for the rootfs can be different. + for devx in $(btrfs fil show $dev| sed -rn 's#.*path (\S+)$#\1#p'); do + if [[ $devx == dm-* ]]; then + devx=/dev/$devx + mapper-dev devx + fi + d devx=$devx + root_dir=$(sed -rn "s,^\s*$devx\s+(\S+).*\bsubvolid=[05]\b.*,\1,p" /etc/mtab /etc/fstab|head -n1) + if [[ $root_dir ]]; then + d root_dir=$root_dir break fi done - if [[ $bsub ]]; then - # in testing, same subvol is 136 bytes. allow some overhead. 32 happens sometimes under systemd. - # $ errno 32 - # EPIPE 32 Broken pipe - lines=$(btrfs send --no-data -p $bsub $leaf | btrfs receive --dump | head -n 100 | wc -l || [[ $? == 141 || ${PIPESTATUS[0]} == 32 ]]) - if [[ $lines == 0 ]]; then - # example output of no differences: - # snapshot ./qrtest uuid=c41ff6b7-0527-f34d-95ac-190eecf54ff5 transid=2239 parent_uuid=64949e1b-4a3e-3945-9a8e-cd7b7c15d7d6 parent_transid=2239 - echo suspected identical: $bsub $leaf - b btrfs sub del $leaf - fi + if [[ ! $root_dir ]]; then + echo "$0: error could not find root subvol mount for $dev" >&2 + exit 1 fi - ### end check if leaf is different, delete it if not ### - - ## begin expire leaf vols ## - leaf_vols=($vol.leaf.*) - count=${#leaf_vols[@]} - leaf_limit_time=$(( EPOCHSECONDS - 60*60*24*60 )) # 60 days - leaf_new_limit_time=$(( EPOCHSECONDS - 60*60*24 * 5 )) # 5 days this - # goes backwards from oldest. leaf_new_limit_time is a safety - # measure to ensure we don't delete very recent leafs. - for leaf in ${leaf_vols[@]}; do - leaf_time=$(date -d ${leaf#"$vol".leaf.} +%s) - if (( leaf_limit_time > leaf_time || ( leaf_new_limit_time > leaf_time && count > 30 ) )); then - b btrfs sub del $leaf + ### end getting root_dir + + cd $root_dir + if [[ -e $vol ]]; then + if [[ $vol == qd ]]; then + b btrfs sub del qd + else + leaf=$vol.leaf.$(date +%Y-%m-%dT%H:%M:%S%z) + b mv $vol $leaf + b btrfs property set -ts $leaf ro true + + ### begin check if leaf is different, delete it if not ### + parentid=$(btrfs sub show $leaf | awk '$1 == "Parent" && $2 == "UUID:" {print $3}') + bsubs=(btrbk/$vol.*) + bsub= # base subvolume + # go in reverse order as its more likely to be at the end + for ((i=${#bsubs[@]}-1; i>=0; i--)); do + if [[ $parentid == $(btrfs sub show ${bsubs[i]} | awk '$1 == "UUID:" {print $2}') ]]; then + bsub=${bsubs[i]} + break + fi + done + if [[ $bsub ]]; then + # in testing, same subvol is 136 bytes. allow some overhead. 32 happens sometimes under systemd. + # $ errno 32 + # EPIPE 32 Broken pipe + lines=$(btrfs send --no-data -p $bsub $leaf | btrfs receive --dump | head -n 100 | wc -l || [[ $? == 141 || ${PIPESTATUS[0]} == 32 ]]) + if [[ $lines == 0 ]]; then + # example output of no differences: + # snapshot ./qrtest uuid=c41ff6b7-0527-f34d-95ac-190eecf54ff5 transid=2239 parent_uuid=64949e1b-4a3e-3945-9a8e-cd7b7c15d7d6 parent_transid=2239 + echo suspected identical: $bsub $leaf + b btrfs sub del $leaf + fi + fi + ### end check if leaf is different, delete it if not ### + + ## begin expire leaf vols ## + leaf_vols=($vol.leaf.*) + count=${#leaf_vols[@]} + leaf_limit_time=$(( EPOCHSECONDS - 60*60*24*60 )) # 60 days + leaf_new_limit_time=$(( EPOCHSECONDS - 60*60*24 * 5 )) # 5 days this + # goes backwards from oldest. leaf_new_limit_time is a safety + # measure to ensure we don't delete very recent leafs. + for leaf in ${leaf_vols[@]}; do + leaf_time=$(date -d ${leaf#"$vol".leaf.} +%s) + if (( leaf_limit_time > leaf_time || ( leaf_new_limit_time > leaf_time && count > 30 ) )); then + b btrfs sub del $leaf + fi + count=$((count-1)) + done fi - count=$((count-1)) + ## end expire leaf vols ## + fi + #### end dealing with leaf vols #### + + # Note, we make a few assumptions in this script, like + # $d was not a different subvol id than $vol, and + # things otherwise didn't get mounted very strangely. + b btrfs sub snapshot $fresh_snap $vol + for dir in $d ${binds[@]}; do + m mnt $dir done - fi - ## end expire leaf vols ## + rm -f /nocow/btrfs-stale/$vol + done + else + # If we unmounted some but not all, restore them. + for dir in ${unmounted[@]}; do + mnt $dir + done fi - #### end dealing with leaf vols #### - - # Note, we make a few assumptions in this script, like - # $d was not a different subvol id than $vol, and - # things otherwise didn't get mounted very strangely. - b btrfs sub snapshot $fresh_snap $vol - for dir in $d ${binds[@]}; do - m mnt $dir - done - - ## arbtt disabled for now - # if [[ $vol == q ]]; then - # # maybe this will fail if X is not running - # x sudo -u $(id -nu 1000) XDG_RUNTIME_DIR=/run/user/1000 systemctl --user start arbtt ||: - # fi - - stale_dir=/nocow/btrfs-stale - rm -f $stale_dir/$d -done - +fi for dir in /mnt/r7/amy/{root/root,boot/boot}_ubuntubionic /mnt/{root2/root,boot2/boot}_ubuntubionic; do diff --git a/filesystem/usr/local/bin/umount-funcs b/filesystem/usr/local/bin/umount-funcs index f45c6f3..29f0ddf 100644 --- a/filesystem/usr/local/bin/umount-funcs +++ b/filesystem/usr/local/bin/umount-funcs @@ -36,29 +36,36 @@ pid-check() { done done } -kill-dir() { - for sig; do - echo kill-dir $sig - found_pids=false - if pids=$(timeout 4 lsof -t $dir); then - found_pids=true - timeout 4 lsof -w $dir - pid-check - kill -$sig $pids - fi - # fuser will find open sockets that lsof won't, for example from gpg-agent. - # note: -v shows kernel processes, which then doesn't return true when we want - if pids=$(timeout 4 fuser -m $dir 2>/dev/null); then - pid-check - found_pids=true - fuser -$sig -mvk $dir +get-pids() { + local i dir_regex + for (( i=0; i < ${#kill_dirs[@]}; i++ )); do + if (( i == 0 )); then + dir_regex="${kill_dirs[$i]}" + else + dir_regex="$dir_regex|${kill_dirs[$i]}" fi - sleep .8 - if ! $found_pids; then + done + pids=$(timeout 30 lsof -XF n | awk 'BEGIN { RS = "\np" }; {if($1 in printed_pids) next; for(i=2;i<=NF;i++){ if($i ~ "^n('$dir_regex')(/|$)") { printed_pids[$1]=1; printf("%s ", $1); break} } }') +} + +kill-dir() { + local pids i + + sigs=("$@") + for (( i=0; i<= $#; i++ )); do + get-pids + if [[ ! $pids ]]; then return 0 fi + # ran out of signals to try: + if (( i == $# )); then + return 1 + fi + sig=${sigs[$i]} + pid-check + echo kill-dir $sig + kill -$sig $pids done - return 1 } umount-try() { @@ -74,27 +81,30 @@ umount-try() { # leaf function. others are just used by this one. umount-kill() { - dir=$1 - if mountpoint -q $dir; then - if ! umount-try $dir; then - if ! kill-dir TERM TERM TERM INT INT HUP HUP TERM TERM TERM INT INT HUP HUP; then - if [[ $force ]] && $force; then kill-dir KILL; fi - fi + local -a kill_dirs + for dir; do + if ! mountpoint -q $dir; then + echo "expected mountpoint args, got $dir" + return 1 + fi + done - if ! umount-try $dir; then - # 2024-08, i've been noticing emacs randomly holding open /a - # without its pid being found by fuser or lsof. - # i'm on commit b9da5ee06f2e4ae807336dd6a641ae797831d097 - if m pkill -xf 'emacs --daemon'; then - sleep 2 - umount-try $dir ||: - fi - fi - if ! $umounted; then - echo "$0: failed to umount $dir" - umount_ret=false - ret=1 - fi + for dir; do + if ! umount-try $dir; then + kill_dirs+=($dir) fi + done + + if ! kill-dir TERM TERM TERM INT INT HUP HUP TERM TERM TERM INT INT HUP HUP; then + if [[ $force ]] && $force; then kill-dir KILL; fi + fi + + # If we fail, consider trying this to see if it catches more processes: + # fuser -m $dir + umount-try $dir ||: + if ! $umounted; then + echo "$0: failed to umount $dir" + umount_ret=false + ret=1 fi }