f50f5494e2aba068fb68aa2bd51f748b87d358a7
[distro-setup] / btrfsmaint
1 #!/bin/bash
2 # I, Ian Kelling, follow the GNU license recommendations at
3 # https://www.gnu.org/licenses/license-recommendations.en.html. They
4 # recommend that small programs, < 300 lines, be licensed under the
5 # Apache License 2.0. This file contains or is part of one or more small
6 # programs. If a small program grows beyond 300 lines, I plan to switch
7 # its license to GPL.
8
9 # Copyright 2024 Ian Kelling
10
11 # Licensed under the Apache License, Version 2.0 (the "License");
12 # you may not use this file except in compliance with the License.
13 # You may obtain a copy of the License at
14
15 # http://www.apache.org/licenses/LICENSE-2.0
16
17 # Unless required by applicable law or agreed to in writing, software
18 # distributed under the License is distributed on an "AS IS" BASIS,
19 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 # See the License for the specific language governing permissions and
21 # limitations under the License.
22
23
24
25 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
26
27 set -e; . /usr/local/lib/bash-bear; set +e
28
29 # inspired from
30 # https://github.com/kdave/btrfsmaintenance
31
32 if [[ $INVOCATION_ID ]]; then
33 err-cleanup() {
34 exim -odf -i root <<EOF
35 From: root@$(hostname -f)
36 To: root@$(hostname -f)
37 Subject: btrfsmaint automatically exited on command error
38
39 journalctl -u btrfsmaint -n 50:
40 $(journalctl -u btrfsmaint -n 50)
41 EOF
42 }
43 fi
44
45 dusage="5 10"
46 musage="5"
47
48 e() {
49 echo "btrfsmaint: $*"
50 if ! $dryrun; then
51 "$@"
52 fi
53 }
54
55 check-idle() {
56 type -p xscreensaver-command &>/dev/null || return 0
57 export XAUTHORITY=/home/iank/.Xauthority
58 export DISPLAY=:0
59 locked=false
60 if lock_info=$(xscreensaver-command -time 2>/dev/null); then
61 if [[ $lock_info != *non-blanked* ]]; then
62 locked=true
63 fi
64 fi
65 }
66
67 usage() {
68 cat <<EOF
69 Usage: ${0##*/} [OPTIONS]
70 Do btrfs maintence or stop if we have X and xprintidle shows a user
71
72 Normally, no options are needed.
73
74 --check Only check if an existing maintence should be cancelled due to
75 nonidle user and run in a loop every 20 seconds for 10
76 minutes.
77
78 --dryrun Just print out what we would do.
79
80 --force Run regardless of user idle status on all disks and do scrub
81 regardless of when it was last run.
82 --no-stats Avoid checking error statistics. Use this to avoid a rare race
83 condition when running --check concurrently with normal run.
84
85
86 -h|--help Show help
87
88 Note: Uses util-linux getopt option parsing: spaces between args and
89 options, short options can be combined, options before args.
90 EOF
91 exit $1
92 }
93
94 ##### begin command line parsing ########
95
96 # ensure we can handle args with spaces or empty.
97 ret=0; getopt -T || ret=$?
98 [[ $ret == 4 ]] || { echo "Install util-linux for enhanced getopt" >&2; exit 1; }
99
100 check=false
101 dryrun=false
102 force=false
103 stats=true
104
105 temp=$(getopt -l help,check,dryrun,force,no-stats h "$@") || usage 1
106 eval set -- "$temp"
107 while true; do
108 case $1 in
109 --check) check=true ;;
110 --dryrun) dryrun=true ;;
111 --force) force=true ;;
112 --no-stats) stats=false ;;
113 -h|--help) usage ;;
114 --) shift; break ;;
115 *) echo "$0: unexpected args: $*" >&2 ; usage 1 ;;
116 esac
117 shift
118 done
119 readonly check dryrun force stats
120 ##### end command line parsing ########
121
122
123 main() {
124 if ! $force; then
125 check-idle
126 if ! $check; then
127 min=0
128 max_min=300
129 # When the cron kicks in, we may not be idle (physically sleeping) yet, so
130 # wait.
131 while ! $locked && (( min < max_min )); do
132 min=$(( min + 1 ))
133 sleep 60
134 check-idle
135 done
136 # If we've waited a really long time for idle, just give up.
137 if (( min == max_min )); then
138 return
139 fi
140 fi
141 fi
142
143
144 fnd="findmnt --types btrfs --noheading"
145 for x in $($fnd --output "SOURCE" --nofsroot | sort -u); do
146 mnt=$($fnd --output "TARGET" --first-only --source $x)
147 [[ $mnt ]] || continue
148
149 #### begin look for diff in stats, eg: increasing error count ####
150 if $stats; then
151 tmp=$(mktemp)
152 # ${mnt%/} so that if mnt is / we avoid making a buggy looking path
153 stats_path=${mnt%/}/btrfs-dev-stats
154 if [[ ! -e $stats_path ]]; then
155 btrfs dev stats -c $mnt >$stats_path ||: # populate initial reading
156 elif ! btrfs dev stats -c $mnt >$tmp; then
157 if ! diff -q $stats_path $tmp; then
158 mv $stats_path $stats_path.1
159 cat $tmp >$stats_path
160 diff=$(diff -u $stats_path $tmp 2>&1 ||:)
161 printf "diff of: btrfs dev stats -c %s\n%s\n" "$mnt" "$diff"
162 exim -odf -i root <<EOF
163 From: root@$(hostname -f)
164 To: root@$(hostname -f)
165 Subject: btrfsmaint: device stats changed for $mnt
166
167 diff of: btrfs dev stats -c $mnt
168 $diff
169 EOF
170 fi
171 fi
172 rm -f $tmp
173 fi
174 #### end look for diff in stats, eg: increasing error count ####
175
176 if $check; then
177 if ! $locked; then
178 if $dryrun; then
179 echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt"
180 else
181 btrfs scrub cancel $mnt &>/dev/null ||:
182 fi
183 fi
184 continue
185 fi
186
187 # for comparing before and after balance.
188 # the log is already fairly verbose, so commented.
189 # e btrfs filesystem df $mnt
190 # e df -H $mnt
191 if btrfs filesystem df $mnt | grep -q "Data+Metadata"; then
192 for usage in $dusage; do
193 e ionice -c 3 btrfs balance start -dusage=$usage -musage=$usage $mnt
194 done
195 else
196 e ionice -c 3 btrfs balance start -dusage=0 $mnt
197 for usage in $dusage; do
198 e ionice -c 3 btrfs balance start -dusage=$usage $mnt
199 done
200 e ionice -c 3 btrfs balance start -musage=0 $mnt
201 for usage in $musage; do
202 e ionice -c 3 btrfs balance start -musage=$usage $mnt
203 done
204 fi
205 date=
206 scrub_status=$(btrfs scrub status $mnt)
207 if printf "%s\n" "$scrub_status" | grep -i '^status:[[:space:]]*finished$' &>/dev/null; then
208 date=$(printf "%s\n" "$scrub_status" | sed -rn 's/^Scrub started:[[:space:]]*(.*)/\1/p')
209 fi
210 if [[ ! $date ]]; then
211 # output from older versions, at least btrfs v4.15.1
212 date=$(
213 printf "%s\n" "$scrub_status" | \
214 sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p'
215 )
216 fi
217 if ! $force && [[ $date ]]; then
218 if $dryrun; then
219 echo "$0: last scrub finish for $mnt: $date"
220 fi
221 date=$(date --date="$date" +%s)
222 # if date is sooner than 60 days ago
223 # the wiki recommends 30 days or so, but
224 # I'm going with 60 days.
225 if (( date > EPOCHSECONDS - 60*60*24*60 )); then
226 if $dryrun; then
227 echo "$0: skiping scrub of $mnt, last was $(( (EPOCHSECONDS - date) / 60/60/24 )) days ago, < 30 days"
228 fi
229 continue
230 fi
231 fi
232 # btrfsmaintenance does -c 2 -n 4, but I want lowest pri.
233 e btrfs scrub start -Bd -c 3 $mnt
234
235 # We normally only do one disk since this is meant to be run in
236 # downtime and if we try to do all disks, we invariably end up doing
237 # a scrub after downtime. So, just do one disk per day.
238 if ! $force; then
239 return 0
240 fi
241 done
242 }
243
244 loop-main() {
245 while true; do
246 main
247 sleep 60
248 done
249 }
250
251 if $check; then
252 loop-main
253 else
254 main
255 fi