iankelling.org Git - distro-setup/blob - btrfsmaint

   1 #!/bin/bash
   2
   3
   4 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
   5
   6 source /a/bin/errhandle/err
   7
   8 # inspired from
   9 # https://github.com/kdave/btrfsmaintenance
  10
  11
  12 # Man page says we could also use a range, i suppose it would be
  13 # logical to use a pattern like 5..10 10..20,
  14 # but I don't know if this would help us at all.
  15 dusage="5 10"
  16 musage="5"
  17
  18 e() {
  19   echo "cron: $*"
  20   if ! $dryrun; then
  21     "$@"
  22   fi
  23 }
  24
  25 check-idle() {
  26   type -p xprintidle &>/dev/null || return 0
  27   export DISPLAY=:0
  28   # a hours, a movie could run that long.
  29   idle_limit=$((1000 * 60 * 60 * 2))
  30   idle_time=$idle_limit
  31   while read -r user; do
  32     new_idle_time=$(sudo -u $user xprintidle 2>/dev/null) ||:
  33     if [[ $new_idle_time && $new_idle_time -lt $idle_time ]]; then
  34       idle_time=$new_idle_time
  35     fi
  36   done < <(users | tr " " "\n" | sort -u)
  37   if (( idle_time < idle_limit )); then
  38     idle=false
  39   else
  40     idle=true
  41   fi
  42 }
  43
  44 usage() {
  45   cat <<EOF
  46 Usage: ${0##*/} [ARGS]
  47 Do btrfs maintence or stop if xprintidle shows a user
  48
  49 force  Run regardless of user idle status on all disks.
  50 check  Only check if an existing maintence should be cancelled due to
  51          nonidle user. Also, runs in a loop every 20 seconds for 10
  52          minutes.
  53
  54 Note: Uses util-linux getopt option parsing: spaces between args and
  55 options, short options can be combined, options before args.
  56 EOF
  57   exit $1
  58 }
  59
  60
  61 force=false
  62 check=false
  63 dryrun=false
  64 if [[ $1 ]]; then
  65   case $1 in
  66     check)
  67       check=true
  68       ;;
  69     force)
  70       force=true
  71       ;;
  72     dryrun)
  73       dryrun=true
  74       ;;
  75     *)
  76       echo "$0: error: unexpected arg" >&2
  77       usage 1
  78       ;;
  79   esac
  80 fi
  81
  82
  83 main() {
  84   idle=true
  85   if ! $force; then
  86     check-idle
  87     if ! $check; then
  88       min=0
  89       max_min=300
  90       # When the cron kicks in, we may not be idle (physically sleeping) yet, so
  91       # wait.
  92       while ! $idle && (( min < max_min )); do
  93         min=$(( min + 1 ))
  94         sleep 60
  95         check-idle
  96       done
  97       # If we've waited a really long time for idle, just give up.
  98       if (( min == max_min )); then
  99         return
 100       fi
 101     fi
 102   fi
 103
 104
 105   fnd="findmnt --types btrfs --noheading"
 106   for x in $($fnd --output "SOURCE" --nofsroot | sort -u); do
 107     mnt=$($fnd --output "TARGET" --first-only --source $x)
 108     [[ $mnt ]] || continue
 109
 110     #### begin look for diff in stats, eg: increasing error count ####
 111
 112     # Only run for $check, since it runs in parallel to non-check, avoid
 113     # race condition.
 114     if $check; then
 115       tmp=$(mktemp)
 116       # if mnt is /, avoid making a buggy looking path
 117       stats_path=${mnt%/}/btrfs-dev-stats
 118       if [[ ! -e $stats_path ]]; then
 119         btrfs dev stats -c $mnt >$stats_path ||: # populate initial reading
 120       elif ! btrfs dev stats -c $mnt >$tmp; then
 121         if ! diff -q $stats_path $tmp; then
 122               exim -t <<EOF
 123 From: root@$HOSTNAME.b8.nz
 124 To: alerts@iankelling.org
 125 Subject: btrfsmaintstop: btrfs dev stats -c $mnt
 126
 127 $(diff -u $stats_path $tmp)
 128 EOF
 129           mv $stats_path $stats_path.1
 130           cat $tmp >$stats_path
 131         fi
 132       fi
 133       rm -f $tmp
 134     fi
 135     #### end look for diff in stats, eg: increasing error count ####
 136
 137     if $check; then
 138       if ! $idle; then
 139         if $dryrun; then
 140           echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt"
 141         else
 142           btrfs scrub cancel $mnt &>/dev/null ||:
 143         fi
 144       fi
 145       continue
 146     fi
 147
 148     # for comparing before and after balance.
 149     # the log is already fairly verbose, so commented.
 150     # e btrfs filesystem df $mnt
 151     # e df -H $mnt
 152     if btrfs filesystem df $mnt | grep -q "Data+Metadata"; then
 153       for usage in $dusage; do
 154         e ionice -c 3 btrfs balance start -dusage=$usage -musage=$usage $mnt
 155       done
 156     else
 157       e ionice -c 3 btrfs balance start -dusage=0 $mnt
 158       for usage in $dusage; do
 159         e ionice -c 3 btrfs balance start -dusage=$usage $mnt
 160       done
 161       e ionice -c 3 btrfs balance start -musage=0 $mnt
 162       for usage in $musage; do
 163         e ionice -c 3 btrfs balance start -musage=$usage $mnt
 164       done
 165     fi
 166     date=
 167     scrub_status=$(btrfs scrub status $mnt)
 168     if printf "%s\n" "$scrub_status" | grep -i '^status:[[:space:]]*finished$' &>/dev/null; then
 169       date=$(printf "%s\n" "$scrub_status" | sed -rn 's/^Scrub started:[[:space:]]*(.*)/\1/p')
 170     fi
 171     if [[ ! $date ]]; then
 172       # output from older versions, at least btrfs v4.15.1
 173       date=$(
 174         printf "%s\n" "$scrub_status" | \
 175           sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p'
 176           )
 177     fi
 178     if [[ $date ]]; then
 179       if $dryrun; then
 180         echo "$0: last scrub finish for $mnt: $date"
 181       fi
 182       date=$(date --date="$date" +%s)
 183       # if date is sooner than 60 days ago
 184       # the wiki recommends 30 days or so, but
 185       # I'm going with 60 days.
 186       if (( date > EPOCHSECONDS - 60*60*24*60 )); then
 187         if $dryrun; then
 188           echo "$0: skiping scrub of $mnt, last was $(( (EPOCHSECONDS - date) / 60/60/24 )) days ago, < 30 days"
 189         fi
 190         continue
 191       fi
 192     fi
 193     # -c 2 -n 4 is from btrfsmaintenance, does ionice
 194     e btrfs scrub start -Bd -c 2 -n 4 $mnt
 195
 196     # We normally only do one disk since this is meant to be run while I sleep
 197     # and if we try to do all disks, we invariably end up doing a scrub still
 198     # after I've woken up. So, just do one per day.
 199     if ! $force; then
 200       return 0
 201     fi
 202   done
 203 }
 204
 205 loop-main() {
 206   while true; do
 207     main
 208     sleep 60
 209   done
 210 }
 211
 212 if $check; then
 213   loop-main
 214 else
 215   main
 216 fi