iankelling.org Git - distro-setup/blob - btrfsmaint

   1 #!/bin/bash
   2
   3
   4 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
   5
   6 set -e; . /usr/local/lib/err; set +e
   7
   8 # inspired from
   9 # https://github.com/kdave/btrfsmaintenance
  10
  11 if [[ $INVOCATION_ID ]]; then
  12   err-cleanup() {
  13     exim -odf -i root <<EOF
  14 From: root@$(hostname -f)
  15 To: root@$(hostname -f)
  16 Subject: btrfsmaint automatically exited on command error
  17
  18 journalctl -u btrfsmaint -n 50:
  19 $(journalctl -u btrfsmaint -n 50)
  20 EOF
  21   }
  22 fi
  23
  24 dusage="5 10"
  25 musage="5"
  26
  27 e() {
  28   echo "btrfsmaint: $*"
  29   if ! $dryrun; then
  30     "$@"
  31   fi
  32 }
  33
  34 check-idle() {
  35   type -p xprintidle &>/dev/null || return 0
  36   export DISPLAY=:0
  37   # a hours, a movie could run that long.
  38   idle_limit=$((1000 * 60 * 60 * 2))
  39   idle_time=$idle_limit
  40   while read -r user; do
  41     new_idle_time=$(sudo -u $user xprintidle 2>/dev/null) ||:
  42     if [[ $new_idle_time && $new_idle_time -lt $idle_time ]]; then
  43       idle_time=$new_idle_time
  44     fi
  45   done < <(users | tr " " "\n" | sort -u)
  46   if (( idle_time < idle_limit )); then
  47     idle=false
  48   else
  49     idle=true
  50   fi
  51 }
  52
  53 usage() {
  54   cat <<EOF
  55 Usage: ${0##*/} [OPTIONS]
  56 Do btrfs maintence or stop if we have X and xprintidle shows a user
  57
  58 Normally, no options are needed.
  59
  60 --check  Only check if an existing maintence should be cancelled due to
  61          nonidle user and run in a loop every 20 seconds for 10
  62          minutes.
  63
  64 --dryrun Just print out what we would do.
  65
  66 --force  Run regardless of user idle status on all disks and do scrub
  67          regardless of when it was last run.
  68 --no-stats  Avoid checking error statistics. Use this to avoid a rare race
  69             condition when running --check concurrently with normal run.
  70
  71
  72 -h|--help   Show help
  73
  74 Note: Uses util-linux getopt option parsing: spaces between args and
  75 options, short options can be combined, options before args.
  76 EOF
  77   exit $1
  78 }
  79
  80 ##### begin command line parsing ########
  81
  82 # ensure we can handle args with spaces or empty.
  83 ret=0; getopt -T || ret=$?
  84 [[ $ret == 4 ]] || { echo "Install util-linux for enhanced getopt" >&2; exit 1; }
  85
  86 check=false
  87 dryrun=false
  88 force=false
  89 stats=true
  90
  91 temp=$(getopt -l help,check,dryrun,force,no-stats h "$@") || usage 1
  92 eval set -- "$temp"
  93 while true; do
  94   case $1 in
  95     --check) check=true ;;
  96     --dryrun) dryrun=true ;;
  97     --force) force=true ;;
  98     --no-stats) stats=false ;;
  99     -h|--help) usage ;;
 100     --) shift; break ;;
 101     *) echo "$0: unexpected args: $*" >&2 ; usage 1 ;;
 102   esac
 103   shift
 104 done
 105 readonly check dryrun force stats
 106 ##### end command line parsing ########
 107
 108
 109 main() {
 110   idle=true
 111   if ! $force; then
 112     check-idle
 113     if ! $check; then
 114       min=0
 115       max_min=300
 116       # When the cron kicks in, we may not be idle (physically sleeping) yet, so
 117       # wait.
 118       while ! $idle && (( min < max_min )); do
 119         min=$(( min + 1 ))
 120         sleep 60
 121         check-idle
 122       done
 123       # If we've waited a really long time for idle, just give up.
 124       if (( min == max_min )); then
 125         return
 126       fi
 127     fi
 128   fi
 129
 130
 131   fnd="findmnt --types btrfs --noheading"
 132   for x in $($fnd --output "SOURCE" --nofsroot | sort -u); do
 133     mnt=$($fnd --output "TARGET" --first-only --source $x)
 134     [[ $mnt ]] || continue
 135
 136     #### begin look for diff in stats, eg: increasing error count ####
 137     if $stats; then
 138       tmp=$(mktemp)
 139       # ${mnt%/} so that if mnt is / we avoid making a buggy looking path
 140       stats_path=${mnt%/}/btrfs-dev-stats
 141       if [[ ! -e $stats_path ]]; then
 142         btrfs dev stats -c $mnt >$stats_path ||: # populate initial reading
 143       elif ! btrfs dev stats -c $mnt >$tmp; then
 144         if ! diff -q $stats_path $tmp; then
 145           mv $stats_path $stats_path.1
 146           cat $tmp >$stats_path
 147           diff=$(diff -u $stats_path $tmp 2>&1 ||:)
 148           printf "diff of: btrfs dev stats -c %s\n%s\n" "$mnt" "$diff"
 149           exim -odf -i root <<EOF
 150 From: root@$(hostname -f)
 151 To: root@$(hostname -f)
 152 Subject: btrfsmaint: device stats changed for $mnt
 153
 154 diff of: btrfs dev stats -c $mnt
 155 $diff
 156 EOF
 157         fi
 158       fi
 159       rm -f $tmp
 160     fi
 161     #### end look for diff in stats, eg: increasing error count ####
 162
 163     if $check; then
 164       if ! $idle; then
 165         if $dryrun; then
 166           echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt"
 167         else
 168           btrfs scrub cancel $mnt &>/dev/null ||:
 169         fi
 170       fi
 171       continue
 172     fi
 173
 174     # for comparing before and after balance.
 175     # the log is already fairly verbose, so commented.
 176     # e btrfs filesystem df $mnt
 177     # e df -H $mnt
 178     if btrfs filesystem df $mnt | grep -q "Data+Metadata"; then
 179       for usage in $dusage; do
 180         e ionice -c 3 btrfs balance start -dusage=$usage -musage=$usage $mnt
 181       done
 182     else
 183       e ionice -c 3 btrfs balance start -dusage=0 $mnt
 184       for usage in $dusage; do
 185         e ionice -c 3 btrfs balance start -dusage=$usage $mnt
 186       done
 187       e ionice -c 3 btrfs balance start -musage=0 $mnt
 188       for usage in $musage; do
 189         e ionice -c 3 btrfs balance start -musage=$usage $mnt
 190       done
 191     fi
 192     date=
 193     scrub_status=$(btrfs scrub status $mnt)
 194     if printf "%s\n" "$scrub_status" | grep -i '^status:[[:space:]]*finished$' &>/dev/null; then
 195       date=$(printf "%s\n" "$scrub_status" | sed -rn 's/^Scrub started:[[:space:]]*(.*)/\1/p')
 196     fi
 197     if [[ ! $date ]]; then
 198       # output from older versions, at least btrfs v4.15.1
 199       date=$(
 200         printf "%s\n" "$scrub_status" | \
 201           sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p'
 202           )
 203     fi
 204     if ! $force && [[ $date ]]; then
 205       if $dryrun; then
 206         echo "$0: last scrub finish for $mnt: $date"
 207       fi
 208       date=$(date --date="$date" +%s)
 209       # if date is sooner than 60 days ago
 210       # the wiki recommends 30 days or so, but
 211       # I'm going with 60 days.
 212       if (( date > EPOCHSECONDS - 60*60*24*60 )); then
 213         if $dryrun; then
 214           echo "$0: skiping scrub of $mnt, last was $(( (EPOCHSECONDS - date) / 60/60/24 )) days ago, < 30 days"
 215         fi
 216         continue
 217       fi
 218     fi
 219     # btrfsmaintenance does -c 2 -n 4, but I want lowest pri.
 220     e btrfs scrub start -Bd -c 3 $mnt
 221
 222     # We normally only do one disk since this is meant to be run in
 223     # downtime and if we try to do all disks, we invariably end up doing
 224     # a scrub after downtime. So, just do one disk per day.
 225     if ! $force; then
 226       return 0
 227     fi
 228   done
 229 }
 230
 231 loop-main() {
 232   while true; do
 233     main
 234     sleep 60
 235   done
 236 }
 237
 238 if $check; then
 239   loop-main
 240 else
 241   main
 242 fi