lots o fixes, beets, shellcheck stuff
[distro-setup] / btrfsmaint
1 #!/bin/bash
2
3
4 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
5
6 set -e; . /usr/local/lib/err; set +e
7
8 # inspired from
9 # https://github.com/kdave/btrfsmaintenance
10
11 if [[ $INVOCATION_ID ]]; then
12 err-cleanup() {
13 exim -odf -i root <<EOF
14 From: root@$(hostname -f)
15 To: root@$(hostname -f)
16 Subject: btrfsmaint automatically exited on command error
17
18 journalctl -u btrfsmaint -n 50:
19 $(journalctl -u btrfsmaint -n 50)
20 EOF
21 }
22 fi
23
24 dusage="5 10"
25 musage="5"
26
27 e() {
28 echo "btrfsmaint: $*"
29 if ! $dryrun; then
30 "$@"
31 fi
32 }
33
34 check-idle() {
35 type -p xprintidle &>/dev/null || return 0
36 export DISPLAY=:0
37 # a hours, a movie could run that long.
38 idle_limit=$((1000 * 60 * 60 * 2))
39 idle_time=$idle_limit
40 while read -r user; do
41 new_idle_time=$(sudo -u $user xprintidle 2>/dev/null) ||:
42 if [[ $new_idle_time && $new_idle_time -lt $idle_time ]]; then
43 idle_time=$new_idle_time
44 fi
45 done < <(users | tr " " "\n" | sort -u)
46 if (( idle_time < idle_limit )); then
47 idle=false
48 else
49 idle=true
50 fi
51 }
52
53 usage() {
54 cat <<EOF
55 Usage: ${0##*/} [OPTIONS]
56 Do btrfs maintence or stop if we have X and xprintidle shows a user
57
58 Normally, no options are needed.
59
60 --check Only check if an existing maintence should be cancelled due to
61 nonidle user and run in a loop every 20 seconds for 10
62 minutes.
63
64 --dryrun Just print out what we would do.
65
66 --force Run regardless of user idle status on all disks and do scrub
67 regardless of when it was last run.
68 --no-stats Avoid checking error statistics. Use this to avoid a rare race
69 condition when running --check concurrently with normal run.
70
71
72 -h|--help Show help
73
74 Note: Uses util-linux getopt option parsing: spaces between args and
75 options, short options can be combined, options before args.
76 EOF
77 exit $1
78 }
79
80 ##### begin command line parsing ########
81
82 # ensure we can handle args with spaces or empty.
83 ret=0; getopt -T || ret=$?
84 [[ $ret == 4 ]] || { echo "Install util-linux for enhanced getopt" >&2; exit 1; }
85
86 check=false
87 dryrun=false
88 force=false
89 stats=true
90
91 temp=$(getopt -l help,check,dryrun,force,no-stats h "$@") || usage 1
92 eval set -- "$temp"
93 while true; do
94 case $1 in
95 --check) check=true ;;
96 --dryrun) dryrun=true ;;
97 --force) force=true ;;
98 --no-stats) stats=false ;;
99 -h|--help) usage ;;
100 --) shift; break ;;
101 *) echo "$0: unexpected args: $*" >&2 ; usage 1 ;;
102 esac
103 shift
104 done
105 readonly check dryrun force stats
106 ##### end command line parsing ########
107
108
109 main() {
110 idle=true
111 if ! $force; then
112 check-idle
113 if ! $check; then
114 min=0
115 max_min=300
116 # When the cron kicks in, we may not be idle (physically sleeping) yet, so
117 # wait.
118 while ! $idle && (( min < max_min )); do
119 min=$(( min + 1 ))
120 sleep 60
121 check-idle
122 done
123 # If we've waited a really long time for idle, just give up.
124 if (( min == max_min )); then
125 return
126 fi
127 fi
128 fi
129
130
131 fnd="findmnt --types btrfs --noheading"
132 for x in $($fnd --output "SOURCE" --nofsroot | sort -u); do
133 mnt=$($fnd --output "TARGET" --first-only --source $x)
134 [[ $mnt ]] || continue
135
136 #### begin look for diff in stats, eg: increasing error count ####
137 if $stats; then
138 tmp=$(mktemp)
139 # ${mnt%/} so that if mnt is / we avoid making a buggy looking path
140 stats_path=${mnt%/}/btrfs-dev-stats
141 if [[ ! -e $stats_path ]]; then
142 btrfs dev stats -c $mnt >$stats_path ||: # populate initial reading
143 elif ! btrfs dev stats -c $mnt >$tmp; then
144 if ! diff -q $stats_path $tmp; then
145 mv $stats_path $stats_path.1
146 cat $tmp >$stats_path
147 diff=$(diff -u $stats_path $tmp 2>&1 ||:)
148 printf "diff of: btrfs dev stats -c %s\n%s\n" "$mnt" "$diff"
149 exim -odf -i root <<EOF
150 From: root@$(hostname -f)
151 To: root@$(hostname -f)
152 Subject: btrfsmaint: device stats changed for $mnt
153
154 diff of: btrfs dev stats -c $mnt
155 $diff
156 EOF
157 fi
158 fi
159 rm -f $tmp
160 fi
161 #### end look for diff in stats, eg: increasing error count ####
162
163 if $check; then
164 if ! $idle; then
165 if $dryrun; then
166 echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt"
167 else
168 btrfs scrub cancel $mnt &>/dev/null ||:
169 fi
170 fi
171 continue
172 fi
173
174 # for comparing before and after balance.
175 # the log is already fairly verbose, so commented.
176 # e btrfs filesystem df $mnt
177 # e df -H $mnt
178 if btrfs filesystem df $mnt | grep -q "Data+Metadata"; then
179 for usage in $dusage; do
180 e ionice -c 3 btrfs balance start -dusage=$usage -musage=$usage $mnt
181 done
182 else
183 e ionice -c 3 btrfs balance start -dusage=0 $mnt
184 for usage in $dusage; do
185 e ionice -c 3 btrfs balance start -dusage=$usage $mnt
186 done
187 e ionice -c 3 btrfs balance start -musage=0 $mnt
188 for usage in $musage; do
189 e ionice -c 3 btrfs balance start -musage=$usage $mnt
190 done
191 fi
192 date=
193 scrub_status=$(btrfs scrub status $mnt)
194 if printf "%s\n" "$scrub_status" | grep -i '^status:[[:space:]]*finished$' &>/dev/null; then
195 date=$(printf "%s\n" "$scrub_status" | sed -rn 's/^Scrub started:[[:space:]]*(.*)/\1/p')
196 fi
197 if [[ ! $date ]]; then
198 # output from older versions, at least btrfs v4.15.1
199 date=$(
200 printf "%s\n" "$scrub_status" | \
201 sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p'
202 )
203 fi
204 if ! $force && [[ $date ]]; then
205 if $dryrun; then
206 echo "$0: last scrub finish for $mnt: $date"
207 fi
208 date=$(date --date="$date" +%s)
209 # if date is sooner than 60 days ago
210 # the wiki recommends 30 days or so, but
211 # I'm going with 60 days.
212 if (( date > EPOCHSECONDS - 60*60*24*60 )); then
213 if $dryrun; then
214 echo "$0: skiping scrub of $mnt, last was $(( (EPOCHSECONDS - date) / 60/60/24 )) days ago, < 30 days"
215 fi
216 continue
217 fi
218 fi
219 # btrfsmaintenance does -c 2 -n 4, but I want lowest pri.
220 e btrfs scrub start -Bd -c 3 $mnt
221
222 # We normally only do one disk since this is meant to be run in
223 # downtime and if we try to do all disks, we invariably end up doing
224 # a scrub after downtime. So, just do one disk per day.
225 if ! $force; then
226 return 0
227 fi
228 done
229 }
230
231 loop-main() {
232 while true; do
233 main
234 sleep 60
235 done
236 }
237
238 if $check; then
239 loop-main
240 else
241 main
242 fi