host info updates
[distro-setup] / btrfsmaint
1 #!/bin/bash
2 # I, Ian Kelling, follow the GNU license recommendations at
3 # https://www.gnu.org/licenses/license-recommendations.en.html. They
4 # recommend that small programs, < 300 lines, be licensed under the
5 # Apache License 2.0. This file contains or is part of one or more small
6 # programs. If a small program grows beyond 300 lines, I plan to switch
7 # its license to GPL.
8
9 # Copyright 2024 Ian Kelling
10
11 # Licensed under the Apache License, Version 2.0 (the "License");
12 # you may not use this file except in compliance with the License.
13 # You may obtain a copy of the License at
14
15 # http://www.apache.org/licenses/LICENSE-2.0
16
17 # Unless required by applicable law or agreed to in writing, software
18 # distributed under the License is distributed on an "AS IS" BASIS,
19 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 # See the License for the specific language governing permissions and
21 # limitations under the License.
22
23
24
25 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
26
27 set -e; . /usr/local/lib/bash-bear; set +e
28
29 # inspired from
30 # https://github.com/kdave/btrfsmaintenance
31
32 if [[ $INVOCATION_ID ]]; then
33 err-cleanup() {
34 exim -odf -i root <<EOF
35 From: root@$(hostname -f)
36 To: root@$(hostname -f)
37 Subject: btrfsmaint automatically exited on command error
38
39 journalctl -u btrfsmaint -n 50:
40 $(journalctl -u btrfsmaint -n 50)
41 EOF
42 }
43 fi
44
45 dusage="5 10"
46 musage="5"
47
48 e() {
49 echo "btrfsmaint: $*"
50 if ! $dryrun; then
51 "$@"
52 fi
53 }
54
55 check-idle() {
56 type -p xscreensaver-command &>/dev/null || return 0
57 export XAUTHORITY=/home/iank/.Xauthority
58 export DISPLAY=:0
59 locked=false
60 if lock_info=$(xscreensaver-command -time); then
61 if [[ $lock_info != *non-blanked* ]]; then
62 locked=true
63 fi
64 else
65 locked=true
66 fi
67 }
68
69 usage() {
70 cat <<EOF
71 Usage: ${0##*/} [OPTIONS]
72 Do btrfs maintence or stop if we have X and xprintidle shows a user
73
74 Normally, no options are needed.
75
76 --check Only check if an existing maintence should be cancelled due to
77 nonidle user and run in a loop every 20 seconds for 10
78 minutes.
79
80 --dryrun Just print out what we would do.
81
82 --force Run regardless of user idle status on all disks and do scrub
83 regardless of when it was last run.
84 --no-stats Avoid checking error statistics. Use this to avoid a rare race
85 condition when running --check concurrently with normal run.
86
87
88 -h|--help Show help
89
90 Note: Uses util-linux getopt option parsing: spaces between args and
91 options, short options can be combined, options before args.
92 EOF
93 exit $1
94 }
95
96 ##### begin command line parsing ########
97
98 # ensure we can handle args with spaces or empty.
99 ret=0; getopt -T || ret=$?
100 [[ $ret == 4 ]] || { echo "Install util-linux for enhanced getopt" >&2; exit 1; }
101
102 check=false
103 dryrun=false
104 force=false
105 stats=true
106
107 temp=$(getopt -l help,check,dryrun,force,no-stats h "$@") || usage 1
108 eval set -- "$temp"
109 while true; do
110 case $1 in
111 --check) check=true ;;
112 --dryrun) dryrun=true ;;
113 --force) force=true ;;
114 --no-stats) stats=false ;;
115 -h|--help) usage ;;
116 --) shift; break ;;
117 *) echo "$0: unexpected args: $*" >&2 ; usage 1 ;;
118 esac
119 shift
120 done
121 readonly check dryrun force stats
122 ##### end command line parsing ########
123
124
125 main() {
126 if ! $force; then
127 check-idle
128 if ! $check; then
129 min=0
130 max_min=300
131 # When the cron kicks in, we may not be idle (physically sleeping) yet, so
132 # wait.
133 while ! $locked && (( min < max_min )); do
134 min=$(( min + 1 ))
135 sleep 60
136 check-idle
137 done
138 # If we've waited a really long time for idle, just give up.
139 if (( min == max_min )); then
140 return
141 fi
142 fi
143 fi
144
145
146 fnd="findmnt --types btrfs --noheading"
147 for x in $($fnd --output "SOURCE" --nofsroot | sort -u); do
148 mnt=$($fnd --output "TARGET" --first-only --source $x)
149 [[ $mnt ]] || continue
150
151 #### begin look for diff in stats, eg: increasing error count ####
152 if $stats; then
153 tmp=$(mktemp)
154 # ${mnt%/} so that if mnt is / we avoid making a buggy looking path
155 stats_path=${mnt%/}/btrfs-dev-stats
156 if [[ ! -e $stats_path ]]; then
157 btrfs dev stats -c $mnt >$stats_path ||: # populate initial reading
158 elif ! btrfs dev stats -c $mnt >$tmp; then
159 if ! diff -q $stats_path $tmp; then
160 mv $stats_path $stats_path.1
161 cat $tmp >$stats_path
162 diff=$(diff -u $stats_path $tmp 2>&1 ||:)
163 printf "diff of: btrfs dev stats -c %s\n%s\n" "$mnt" "$diff"
164 exim -odf -i root <<EOF
165 From: root@$(hostname -f)
166 To: root@$(hostname -f)
167 Subject: btrfsmaint: device stats changed for $mnt
168
169 diff of: btrfs dev stats -c $mnt
170 $diff
171 EOF
172 fi
173 fi
174 rm -f $tmp
175 fi
176 #### end look for diff in stats, eg: increasing error count ####
177
178 if $check; then
179 if ! $locked; then
180 if $dryrun; then
181 echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt"
182 else
183 btrfs scrub cancel $mnt &>/dev/null ||:
184 fi
185 fi
186 continue
187 fi
188
189 # for comparing before and after balance.
190 # the log is already fairly verbose, so commented.
191 # e btrfs filesystem df $mnt
192 # e df -H $mnt
193 if btrfs filesystem df $mnt | grep -q "Data+Metadata"; then
194 for usage in $dusage; do
195 e ionice -c 3 btrfs balance start -dusage=$usage -musage=$usage $mnt
196 done
197 else
198 e ionice -c 3 btrfs balance start -dusage=0 $mnt
199 for usage in $dusage; do
200 e ionice -c 3 btrfs balance start -dusage=$usage $mnt
201 done
202 e ionice -c 3 btrfs balance start -musage=0 $mnt
203 for usage in $musage; do
204 e ionice -c 3 btrfs balance start -musage=$usage $mnt
205 done
206 fi
207 date=
208 scrub_status=$(btrfs scrub status $mnt)
209 if printf "%s\n" "$scrub_status" | grep -i '^status:[[:space:]]*finished$' &>/dev/null; then
210 date=$(printf "%s\n" "$scrub_status" | sed -rn 's/^Scrub started:[[:space:]]*(.*)/\1/p')
211 fi
212 if [[ ! $date ]]; then
213 # output from older versions, at least btrfs v4.15.1
214 date=$(
215 printf "%s\n" "$scrub_status" | \
216 sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p'
217 )
218 fi
219 if ! $force && [[ $date ]]; then
220 if $dryrun; then
221 echo "$0: last scrub finish for $mnt: $date"
222 fi
223 date=$(date --date="$date" +%s)
224 # if date is sooner than 60 days ago
225 # the wiki recommends 30 days or so, but
226 # I'm going with 60 days.
227 if (( date > EPOCHSECONDS - 60*60*24*60 )); then
228 if $dryrun; then
229 echo "$0: skiping scrub of $mnt, last was $(( (EPOCHSECONDS - date) / 60/60/24 )) days ago, < 30 days"
230 fi
231 continue
232 fi
233 fi
234 # btrfsmaintenance does -c 2 -n 4, but I want lowest pri.
235 e btrfs scrub start -Bd -c 3 $mnt
236
237 # We normally only do one disk since this is meant to be run in
238 # downtime and if we try to do all disks, we invariably end up doing
239 # a scrub after downtime. So, just do one disk per day.
240 if ! $force; then
241 return 0
242 fi
243 done
244 }
245
246 loop-main() {
247 while true; do
248 main
249 sleep 60
250 done
251 }
252
253 if $check; then
254 loop-main
255 else
256 main
257 fi