various fixes
[distro-setup] / btrfsmaint
1 #!/bin/bash
2
3
4 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
5
6 set -e; . /usr/local/lib/err; set +e
7
8 # inspired from
9 # https://github.com/kdave/btrfsmaintenance
10
11 if [[ $INVOCATION_ID ]]; then
12 err-cleanup() {
13 exim -odf -i root <<EOF
14 From: root@$(hostname -f)
15 To: root@$(hostname -f)
16 Subject: btrfsmaint automatically exited on command error
17
18 journalctl -u btrfsmaint -n 50:
19 $(journalctl -u btrfsmaint -n 50)
20 EOF
21 }
22 fi
23
24 dusage="5 10"
25 musage="5"
26
27 e() {
28 echo "btrfsmaint: $*"
29 if ! $dryrun; then
30 "$@"
31 fi
32 }
33
34 check-idle() {
35 type -p xscreensaver-command &>/dev/null || return 0
36 export XAUTHORITY=/home/iank/.Xauthority
37 export DISPLAY=:0
38 locked=false
39 if lock_info=$(xscreensaver-command -time); then
40 if [[ $lock_info != *non-blanked* ]]; then
41 locked=true
42 fi
43 else
44 locked=true
45 fi
46 }
47
48 usage() {
49 cat <<EOF
50 Usage: ${0##*/} [OPTIONS]
51 Do btrfs maintence or stop if we have X and xprintidle shows a user
52
53 Normally, no options are needed.
54
55 --check Only check if an existing maintence should be cancelled due to
56 nonidle user and run in a loop every 20 seconds for 10
57 minutes.
58
59 --dryrun Just print out what we would do.
60
61 --force Run regardless of user idle status on all disks and do scrub
62 regardless of when it was last run.
63 --no-stats Avoid checking error statistics. Use this to avoid a rare race
64 condition when running --check concurrently with normal run.
65
66
67 -h|--help Show help
68
69 Note: Uses util-linux getopt option parsing: spaces between args and
70 options, short options can be combined, options before args.
71 EOF
72 exit $1
73 }
74
75 ##### begin command line parsing ########
76
77 # ensure we can handle args with spaces or empty.
78 ret=0; getopt -T || ret=$?
79 [[ $ret == 4 ]] || { echo "Install util-linux for enhanced getopt" >&2; exit 1; }
80
81 check=false
82 dryrun=false
83 force=false
84 stats=true
85
86 temp=$(getopt -l help,check,dryrun,force,no-stats h "$@") || usage 1
87 eval set -- "$temp"
88 while true; do
89 case $1 in
90 --check) check=true ;;
91 --dryrun) dryrun=true ;;
92 --force) force=true ;;
93 --no-stats) stats=false ;;
94 -h|--help) usage ;;
95 --) shift; break ;;
96 *) echo "$0: unexpected args: $*" >&2 ; usage 1 ;;
97 esac
98 shift
99 done
100 readonly check dryrun force stats
101 ##### end command line parsing ########
102
103
104 main() {
105 if ! $force; then
106 check-idle
107 if ! $check; then
108 min=0
109 max_min=300
110 # When the cron kicks in, we may not be idle (physically sleeping) yet, so
111 # wait.
112 while ! $locked && (( min < max_min )); do
113 min=$(( min + 1 ))
114 sleep 60
115 check-idle
116 done
117 # If we've waited a really long time for idle, just give up.
118 if (( min == max_min )); then
119 return
120 fi
121 fi
122 fi
123
124
125 fnd="findmnt --types btrfs --noheading"
126 for x in $($fnd --output "SOURCE" --nofsroot | sort -u); do
127 mnt=$($fnd --output "TARGET" --first-only --source $x)
128 [[ $mnt ]] || continue
129
130 #### begin look for diff in stats, eg: increasing error count ####
131 if $stats; then
132 tmp=$(mktemp)
133 # ${mnt%/} so that if mnt is / we avoid making a buggy looking path
134 stats_path=${mnt%/}/btrfs-dev-stats
135 if [[ ! -e $stats_path ]]; then
136 btrfs dev stats -c $mnt >$stats_path ||: # populate initial reading
137 elif ! btrfs dev stats -c $mnt >$tmp; then
138 if ! diff -q $stats_path $tmp; then
139 mv $stats_path $stats_path.1
140 cat $tmp >$stats_path
141 diff=$(diff -u $stats_path $tmp 2>&1 ||:)
142 printf "diff of: btrfs dev stats -c %s\n%s\n" "$mnt" "$diff"
143 exim -odf -i root <<EOF
144 From: root@$(hostname -f)
145 To: root@$(hostname -f)
146 Subject: btrfsmaint: device stats changed for $mnt
147
148 diff of: btrfs dev stats -c $mnt
149 $diff
150 EOF
151 fi
152 fi
153 rm -f $tmp
154 fi
155 #### end look for diff in stats, eg: increasing error count ####
156
157 if $check; then
158 if ! $locked; then
159 if $dryrun; then
160 echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt"
161 else
162 btrfs scrub cancel $mnt &>/dev/null ||:
163 fi
164 fi
165 continue
166 fi
167
168 # for comparing before and after balance.
169 # the log is already fairly verbose, so commented.
170 # e btrfs filesystem df $mnt
171 # e df -H $mnt
172 if btrfs filesystem df $mnt | grep -q "Data+Metadata"; then
173 for usage in $dusage; do
174 e ionice -c 3 btrfs balance start -dusage=$usage -musage=$usage $mnt
175 done
176 else
177 e ionice -c 3 btrfs balance start -dusage=0 $mnt
178 for usage in $dusage; do
179 e ionice -c 3 btrfs balance start -dusage=$usage $mnt
180 done
181 e ionice -c 3 btrfs balance start -musage=0 $mnt
182 for usage in $musage; do
183 e ionice -c 3 btrfs balance start -musage=$usage $mnt
184 done
185 fi
186 date=
187 scrub_status=$(btrfs scrub status $mnt)
188 if printf "%s\n" "$scrub_status" | grep -i '^status:[[:space:]]*finished$' &>/dev/null; then
189 date=$(printf "%s\n" "$scrub_status" | sed -rn 's/^Scrub started:[[:space:]]*(.*)/\1/p')
190 fi
191 if [[ ! $date ]]; then
192 # output from older versions, at least btrfs v4.15.1
193 date=$(
194 printf "%s\n" "$scrub_status" | \
195 sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p'
196 )
197 fi
198 if ! $force && [[ $date ]]; then
199 if $dryrun; then
200 echo "$0: last scrub finish for $mnt: $date"
201 fi
202 date=$(date --date="$date" +%s)
203 # if date is sooner than 60 days ago
204 # the wiki recommends 30 days or so, but
205 # I'm going with 60 days.
206 if (( date > EPOCHSECONDS - 60*60*24*60 )); then
207 if $dryrun; then
208 echo "$0: skiping scrub of $mnt, last was $(( (EPOCHSECONDS - date) / 60/60/24 )) days ago, < 30 days"
209 fi
210 continue
211 fi
212 fi
213 # btrfsmaintenance does -c 2 -n 4, but I want lowest pri.
214 e btrfs scrub start -Bd -c 3 $mnt
215
216 # We normally only do one disk since this is meant to be run in
217 # downtime and if we try to do all disks, we invariably end up doing
218 # a scrub after downtime. So, just do one disk per day.
219 if ! $force; then
220 return 0
221 fi
222 done
223 }
224
225 loop-main() {
226 while true; do
227 main
228 sleep 60
229 done
230 }
231
232 if $check; then
233 loop-main
234 else
235 main
236 fi