[[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
-source /a/bin/errhandle/err
+set -e; . /usr/local/lib/bash-bear; set +e
# inspired from
# https://github.com/kdave/btrfsmaintenance
+if [[ $INVOCATION_ID ]]; then
+ err-cleanup() {
+ exim -odf -i root <<EOF
+From: root@$(hostname -f)
+To: root@$(hostname -f)
+Subject: btrfsmaint automatically exited on command error
+
+journalctl -u btrfsmaint -n 50:
+$(journalctl -u btrfsmaint -n 50)
+EOF
+ }
+fi
-# Man page says we could also use a range, i suppose it would be
-# logical to use a pattern like 5..10 10..20,
-# but I don't know if this would help us at all.
dusage="5 10"
musage="5"
e() {
- echo "cron: $*"
+ echo "btrfsmaint: $*"
if ! $dryrun; then
"$@"
fi
}
check-idle() {
- type -p xprintidle &>/dev/null || return 0
+ type -p xscreensaver-command &>/dev/null || return 0
+ export XAUTHORITY=/home/iank/.Xauthority
export DISPLAY=:0
- # a hours, a movie could run that long.
- idle_limit=$((1000 * 60 * 60 * 2))
- idle_time=$idle_limit
- while read -r user; do
- new_idle_time=$(sudo -u $user xprintidle 2>/dev/null) ||:
- if [[ $new_idle_time && $new_idle_time -lt $idle_time ]]; then
- idle_time=$new_idle_time
+ locked=false
+ if lock_info=$(xscreensaver-command -time); then
+ if [[ $lock_info != *non-blanked* ]]; then
+ locked=true
fi
- done < <(users | tr " " "\n" | sort -u)
- if (( idle_time < idle_limit )); then
- idle=false
else
- idle=true
+ locked=true
fi
}
-
usage() {
cat <<EOF
-Usage: ${0##*/} [ARGS]
-Do btrfs maintence or stop if xprintidle shows a user
+Usage: ${0##*/} [OPTIONS]
+Do btrfs maintence or stop if we have X and xprintidle shows a user
-force Run regardless of user idle status
-check Only check if an existing maintence should be cancelled due to
- nonidle user. Also, runs in a loop every 20 seconds for 10
+Normally, no options are needed.
+
+--check Only check if an existing maintence should be cancelled due to
+ nonidle user and run in a loop every 20 seconds for 10
minutes.
+--dryrun Just print out what we would do.
+
+--force Run regardless of user idle status on all disks and do scrub
+ regardless of when it was last run.
+--no-stats Avoid checking error statistics. Use this to avoid a rare race
+ condition when running --check concurrently with normal run.
+
+
+-h|--help Show help
+
Note: Uses util-linux getopt option parsing: spaces between args and
options, short options can be combined, options before args.
EOF
exit $1
}
+##### begin command line parsing ########
+
+# ensure we can handle args with spaces or empty.
+ret=0; getopt -T || ret=$?
+[[ $ret == 4 ]] || { echo "Install util-linux for enhanced getopt" >&2; exit 1; }
-force=false
check=false
dryrun=false
-if [[ $1 ]]; then
+force=false
+stats=true
+
+temp=$(getopt -l help,check,dryrun,force,no-stats h "$@") || usage 1
+eval set -- "$temp"
+while true; do
case $1 in
- check)
- check=true
- ;;
- force)
- force=true
- ;;
- dryrun)
- dryrun=true
- ;;
- *)
- echo "$0: error: unexpected arg" >&2
- usage 1
- ;;
+ --check) check=true ;;
+ --dryrun) dryrun=true ;;
+ --force) force=true ;;
+ --no-stats) stats=false ;;
+ -h|--help) usage ;;
+ --) shift; break ;;
+ *) echo "$0: unexpected args: $*" >&2 ; usage 1 ;;
esac
-fi
+ shift
+done
+readonly check dryrun force stats
+##### end command line parsing ########
main() {
- idle=true
if ! $force; then
check-idle
+ if ! $check; then
+ min=0
+ max_min=300
+ # When the cron kicks in, we may not be idle (physically sleeping) yet, so
+ # wait.
+ while ! $locked && (( min < max_min )); do
+ min=$(( min + 1 ))
+ sleep 60
+ check-idle
+ done
+ # If we've waited a really long time for idle, just give up.
+ if (( min == max_min )); then
+ return
+ fi
+ fi
fi
- tmp=$(mktemp)
fnd="findmnt --types btrfs --noheading"
for x in $($fnd --output "SOURCE" --nofsroot | sort -u); do
mnt=$($fnd --output "TARGET" --first-only --source $x)
[[ $mnt ]] || continue
- if ! btrfs dev stats -c $mnt >$tmp; then
- if diff -q $mnt/btrfs-dev-stats $tmp; then
- diff -u $mnt/btrfs-dev-stats $tmp | mail -s "$HOSTNAME: error: btrfs dev stats -c $mnt" root@localhost
- cat $tmp >$mnt/btrfs-dev-stats
+ #### begin look for diff in stats, eg: increasing error count ####
+ if $stats; then
+ tmp=$(mktemp)
+ # ${mnt%/} so that if mnt is / we avoid making a buggy looking path
+ stats_path=${mnt%/}/btrfs-dev-stats
+ if [[ ! -e $stats_path ]]; then
+ btrfs dev stats -c $mnt >$stats_path ||: # populate initial reading
+ elif ! btrfs dev stats -c $mnt >$tmp; then
+ if ! diff -q $stats_path $tmp; then
+ mv $stats_path $stats_path.1
+ cat $tmp >$stats_path
+ diff=$(diff -u $stats_path $tmp 2>&1 ||:)
+ printf "diff of: btrfs dev stats -c %s\n%s\n" "$mnt" "$diff"
+ exim -odf -i root <<EOF
+From: root@$(hostname -f)
+To: root@$(hostname -f)
+Subject: btrfsmaint: device stats changed for $mnt
+
+diff of: btrfs dev stats -c $mnt
+$diff
+EOF
+ fi
fi
+ rm -f $tmp
fi
- rm -f $tmp
+ #### end look for diff in stats, eg: increasing error count ####
- if ! $idle; then
- if $dryrun; then
- echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt"
- else
- btrfs scrub cancel $mnt &>/dev/null ||:
- continue
- fi
- fi
if $check; then
+ if ! $locked; then
+ if $dryrun; then
+ echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt"
+ else
+ btrfs scrub cancel $mnt &>/dev/null ||:
+ fi
+ fi
continue
fi
sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p'
)
fi
- if [[ $date ]]; then
+ if ! $force && [[ $date ]]; then
if $dryrun; then
echo "$0: last scrub finish for $mnt: $date"
fi
date=$(date --date="$date" +%s)
- # if date is sooner than 90 days ago
+ # if date is sooner than 60 days ago
# the wiki recommends 30 days or so, but
- # it makes the comp lag like shit for a day,
- # so I'm going with 90 days.
- if (( date > EPOCHSECONDS - 60*60*24*30 )); then
+ # I'm going with 60 days.
+ if (( date > EPOCHSECONDS - 60*60*24*60 )); then
if $dryrun; then
echo "$0: skiping scrub of $mnt, last was $(( (EPOCHSECONDS - date) / 60/60/24 )) days ago, < 30 days"
fi
continue
fi
fi
- # -c 2 -n 4 is from btrfsmaintenance, does ionice
- e btrfs scrub start -Bd -c 2 -n 4 $mnt
+ # btrfsmaintenance does -c 2 -n 4, but I want lowest pri.
+ e btrfs scrub start -Bd -c 3 $mnt
+
+ # We normally only do one disk since this is meant to be run in
+ # downtime and if we try to do all disks, we invariably end up doing
+ # a scrub after downtime. So, just do one disk per day.
+ if ! $force; then
+ return 0
+ fi
done
}