various updates
[distro-setup] / btrfsmaint
1 #!/bin/bash
2
3
4 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
5
6 source /a/bin/errhandle/err
7
8 # inspired from
9 # https://github.com/kdave/btrfsmaintenance
10
11
12 # Man page says we could also use a range, i suppose it would be
13 # logical to use a pattern like 5..10 10..20,
14 # but I don't know if this would help us at all.
15 dusage="5 10"
16 musage="5"
17
18 e() {
19 echo "cron: $*"
20 if ! $dryrun; then
21 "$@"
22 fi
23 }
24
25 check-idle() {
26 type -p xprintidle &>/dev/null || return 0
27 export DISPLAY=:0
28 # a hours, a movie could run that long.
29 idle_limit=$((1000 * 60 * 60 * 2))
30 idle_time=$idle_limit
31 while read -r user; do
32 new_idle_time=$(sudo -u $user xprintidle 2>/dev/null) ||:
33 if [[ $new_idle_time && $new_idle_time -lt $idle_time ]]; then
34 idle_time=$new_idle_time
35 fi
36 done < <(users | tr " " "\n" | sort -u)
37 if (( idle_time < idle_limit )); then
38 idle=false
39 else
40 idle=true
41 fi
42 }
43
44 usage() {
45 cat <<EOF
46 Usage: ${0##*/} [ARGS]
47 Do btrfs maintence or stop if xprintidle shows a user
48
49 force Run regardless of user idle status on all disks and do scrub
50 regardless of when it was last run.
51 check Only check if an existing maintence should be cancelled due to
52 nonidle user. Also, runs in a loop every 20 seconds for 10
53 minutes.
54
55 Note: Uses util-linux getopt option parsing: spaces between args and
56 options, short options can be combined, options before args.
57 EOF
58 exit $1
59 }
60
61
62 force=false
63 check=false
64 dryrun=false
65 if [[ $1 ]]; then
66 case $1 in
67 check)
68 check=true
69 ;;
70 force)
71 force=true
72 ;;
73 dryrun)
74 dryrun=true
75 ;;
76 *)
77 echo "$0: error: unexpected arg" >&2
78 usage 1
79 ;;
80 esac
81 fi
82
83
84 main() {
85 idle=true
86 if ! $force; then
87 check-idle
88 if ! $check; then
89 min=0
90 max_min=300
91 # When the cron kicks in, we may not be idle (physically sleeping) yet, so
92 # wait.
93 while ! $idle && (( min < max_min )); do
94 min=$(( min + 1 ))
95 sleep 60
96 check-idle
97 done
98 # If we've waited a really long time for idle, just give up.
99 if (( min == max_min )); then
100 return
101 fi
102 fi
103 fi
104
105
106 fnd="findmnt --types btrfs --noheading"
107 for x in $($fnd --output "SOURCE" --nofsroot | sort -u); do
108 mnt=$($fnd --output "TARGET" --first-only --source $x)
109 [[ $mnt ]] || continue
110
111 #### begin look for diff in stats, eg: increasing error count ####
112
113 # Only run for $check, since it runs in parallel to non-check, avoid
114 # race condition.
115 if $check; then
116 tmp=$(mktemp)
117 # if mnt is /, avoid making a buggy looking path
118 stats_path=${mnt%/}/btrfs-dev-stats
119 if [[ ! -e $stats_path ]]; then
120 btrfs dev stats -c $mnt >$stats_path ||: # populate initial reading
121 elif ! btrfs dev stats -c $mnt >$tmp; then
122 if ! diff -q $stats_path $tmp; then
123 exim -t <<EOF
124 From: root@$HOSTNAME.b8.nz
125 To: alerts@iankelling.org
126 Subject: btrfsmaintstop: btrfs dev stats -c $mnt
127
128 $(diff -u $stats_path $tmp ||:)
129 EOF
130 mv $stats_path $stats_path.1
131 cat $tmp >$stats_path
132 fi
133 fi
134 rm -f $tmp
135 fi
136 #### end look for diff in stats, eg: increasing error count ####
137
138 if $check; then
139 if ! $idle; then
140 if $dryrun; then
141 echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt"
142 else
143 btrfs scrub cancel $mnt &>/dev/null ||:
144 fi
145 fi
146 continue
147 fi
148
149 # for comparing before and after balance.
150 # the log is already fairly verbose, so commented.
151 # e btrfs filesystem df $mnt
152 # e df -H $mnt
153 if btrfs filesystem df $mnt | grep -q "Data+Metadata"; then
154 for usage in $dusage; do
155 e ionice -c 3 btrfs balance start -dusage=$usage -musage=$usage $mnt
156 done
157 else
158 e ionice -c 3 btrfs balance start -dusage=0 $mnt
159 for usage in $dusage; do
160 e ionice -c 3 btrfs balance start -dusage=$usage $mnt
161 done
162 e ionice -c 3 btrfs balance start -musage=0 $mnt
163 for usage in $musage; do
164 e ionice -c 3 btrfs balance start -musage=$usage $mnt
165 done
166 fi
167 date=
168 scrub_status=$(btrfs scrub status $mnt)
169 if printf "%s\n" "$scrub_status" | grep -i '^status:[[:space:]]*finished$' &>/dev/null; then
170 date=$(printf "%s\n" "$scrub_status" | sed -rn 's/^Scrub started:[[:space:]]*(.*)/\1/p')
171 fi
172 if [[ ! $date ]]; then
173 # output from older versions, at least btrfs v4.15.1
174 date=$(
175 printf "%s\n" "$scrub_status" | \
176 sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p'
177 )
178 fi
179 if ! $force && [[ $date ]]; then
180 if $dryrun; then
181 echo "$0: last scrub finish for $mnt: $date"
182 fi
183 date=$(date --date="$date" +%s)
184 # if date is sooner than 60 days ago
185 # the wiki recommends 30 days or so, but
186 # I'm going with 60 days.
187 if (( date > EPOCHSECONDS - 60*60*24*60 )); then
188 if $dryrun; then
189 echo "$0: skiping scrub of $mnt, last was $(( (EPOCHSECONDS - date) / 60/60/24 )) days ago, < 30 days"
190 fi
191 continue
192 fi
193 fi
194 # btrfsmaintenance does -c 2 -n 4, but I want lowest pri.
195 e btrfs scrub start -Bd -c 3 $mnt
196
197 # We normally only do one disk since this is meant to be run while I sleep
198 # and if we try to do all disks, we invariably end up doing a scrub still
199 # after I've woken up. So, just do one per day.
200 if ! $force; then
201 return 0
202 fi
203 done
204 }
205
206 loop-main() {
207 while true; do
208 main
209 sleep 60
210 done
211 }
212
213 if $check; then
214 loop-main
215 else
216 main
217 fi