X-Git-Url: https://iankelling.org/git/?p=distro-setup;a=blobdiff_plain;f=btrfsmaint;h=c410b7b3503359309ab6938f5c233f7193cd6a3e;hp=6c7dbb1f6788119cafa9cac1dc2cae559cd2c07b;hb=HEAD;hpb=b18dade73dedfe69aa741f8417947d83c4208f2d diff --git a/btrfsmaint b/btrfsmaint index 6c7dbb1..85f2f34 100755 --- a/btrfsmaint +++ b/btrfsmaint @@ -1,102 +1,188 @@ #!/bin/bash +# I, Ian Kelling, follow the GNU license recommendations at +# https://www.gnu.org/licenses/license-recommendations.en.html. They +# recommend that small programs, < 300 lines, be licensed under the +# Apache License 2.0. This file contains or is part of one or more small +# programs. If a small program grows beyond 300 lines, I plan to switch +# its license to GPL. + +# Copyright 2024 Ian Kelling + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@" -source /a/bin/errhandle/err +set -e; . /usr/local/lib/bash-bear; set +e # inspired from # https://github.com/kdave/btrfsmaintenance +if [[ $INVOCATION_ID ]]; then + err-cleanup() { + exim -odf -i root </dev/null || return 0 + type -p xscreensaver-command &>/dev/null || return 0 + export XAUTHORITY=/home/iank/.Xauthority export DISPLAY=:0 - # a hours, a movie could run that long. - idle_limit=$((1000 * 60 * 60 * 2)) - idle_time=$idle_limit - while read -r user; do - new_idle_time=$(sudo -u $user xprintidle 2>/dev/null) ||: - if [[ $new_idle_time && $new_idle_time -lt $idle_time ]]; then - idle_time=$new_idle_time + locked=false + if lock_info=$(xscreensaver-command -time); then + if [[ $lock_info != *non-blanked* ]]; then + locked=true fi - done < <(users | tr " " "\n" | sort -u) - if (( idle_time < idle_limit )); then - idle=false else - idle=true + locked=true fi } - usage() { cat <&2; exit 1; } -force=false check=false -if [[ $1 ]]; then +dryrun=false +force=false +stats=true + +temp=$(getopt -l help,check,dryrun,force,no-stats h "$@") || usage 1 +eval set -- "$temp" +while true; do case $1 in - check) - check=true - ;; - force) - force=true - ;; - *) - echo "$0: error: unexpected arg" >&2 - usage 1 - ;; + --check) check=true ;; + --dryrun) dryrun=true ;; + --force) force=true ;; + --no-stats) stats=false ;; + -h|--help) usage ;; + --) shift; break ;; + *) echo "$0: unexpected args: $*" >&2 ; usage 1 ;; esac -fi + shift +done +readonly check dryrun force stats +##### end command line parsing ######## main() { - idle=true if ! $force; then check-idle + if ! $check; then + min=0 + max_min=300 + # When the cron kicks in, we may not be idle (physically sleeping) yet, so + # wait. + while ! $locked && (( min < max_min )); do + min=$(( min + 1 )) + sleep 60 + check-idle + done + # If we've waited a really long time for idle, just give up. + if (( min == max_min )); then + return + fi + fi fi - tmp=$(mktemp) fnd="findmnt --types btrfs --noheading" for x in $($fnd --output "SOURCE" --nofsroot | sort -u); do mnt=$($fnd --output "TARGET" --first-only --source $x) [[ $mnt ]] || continue - if ! btrfs dev stats -c $mnt >$tmp; then - if diff -q $mnt/btrfs-dev-stats $tmp; then - diff -u $mnt/btrfs-dev-stats $tmp | mail -s "$HOSTNAME: error: btrfs dev stats -c $mnt" root@localhost - cat $tmp >$mnt/btrfs-dev-stats + #### begin look for diff in stats, eg: increasing error count #### + if $stats; then + tmp=$(mktemp) + # ${mnt%/} so that if mnt is / we avoid making a buggy looking path + stats_path=${mnt%/}/btrfs-dev-stats + if [[ ! -e $stats_path ]]; then + btrfs dev stats -c $mnt >$stats_path ||: # populate initial reading + elif ! btrfs dev stats -c $mnt >$tmp; then + if ! diff -q $stats_path $tmp; then + mv $stats_path $stats_path.1 + cat $tmp >$stats_path + diff=$(diff -u $stats_path $tmp 2>&1 ||:) + printf "diff of: btrfs dev stats -c %s\n%s\n" "$mnt" "$diff" + exim -odf -i root </dev/null ||: - continue - fi if $check; then + if ! $locked; then + if $dryrun; then + echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt" + else + btrfs scrub cancel $mnt &>/dev/null ||: + fi + fi continue fi @@ -118,25 +204,42 @@ main() { e ionice -c 3 btrfs balance start -musage=$usage $mnt done fi - # e btrfs filesystem df $mnt - # e df -H $mnt - date=$( - btrfs scrub status $mnt | \ - sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p' - ) - if [[ $date ]]; then + date= + scrub_status=$(btrfs scrub status $mnt) + if printf "%s\n" "$scrub_status" | grep -i '^status:[[:space:]]*finished$' &>/dev/null; then + date=$(printf "%s\n" "$scrub_status" | sed -rn 's/^Scrub started:[[:space:]]*(.*)/\1/p') + fi + if [[ ! $date ]]; then + # output from older versions, at least btrfs v4.15.1 + date=$( + printf "%s\n" "$scrub_status" | \ + sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p' + ) + fi + if ! $force && [[ $date ]]; then + if $dryrun; then + echo "$0: last scrub finish for $mnt: $date" + fi date=$(date --date="$date" +%s) - # if date is sooner than 90 days ago + # if date is sooner than 60 days ago # the wiki recommends 30 days or so, but - # it makes the comp lag like shit for a day, - # so I'm going with 90 days. - if (( date > $(date +%s) - 60*60*24*30 )); then - echo "cron: skiping scrub of $mnt" + # I'm going with 60 days. + if (( date > EPOCHSECONDS - 60*60*24*60 )); then + if $dryrun; then + echo "$0: skiping scrub of $mnt, last was $(( (EPOCHSECONDS - date) / 60/60/24 )) days ago, < 30 days" + fi continue fi fi - # -c 2 -n 4 is from btrfsmaintenance, does ionice - e btrfs scrub start -Bd -c 2 -n 4 $mnt + # btrfsmaintenance does -c 2 -n 4, but I want lowest pri. + e btrfs scrub start -Bd -c 3 $mnt + + # We normally only do one disk since this is meant to be run in + # downtime and if we try to do all disks, we invariably end up doing + # a scrub after downtime. So, just do one disk per day. + if ! $force; then + return 0 + fi done }