lots of updates, some t11 stuff
[distro-setup] / btrfsmaint
1 #!/bin/bash
2
3
4 [[ $EUID == 0 ]] || exec sudo -E "${BASH_SOURCE[0]}" "$@"
5
6 source /a/bin/errhandle/err
7
8 # inspired from
9 # https://github.com/kdave/btrfsmaintenance
10
11
12 # Man page says we could also use a range, i suppose it would be
13 # logical to use a pattern like 5..10 10..20,
14 # but I don't know if this would help us at all.
15 dusage="5 10"
16 musage="5"
17
18 e() {
19 echo "cron: $*"
20 if ! $dryrun; then
21 "$@"
22 fi
23 }
24
25 check-idle() {
26 type -p xprintidle &>/dev/null || return 0
27 export DISPLAY=:0
28 # a hours, a movie could run that long.
29 idle_limit=$((1000 * 60 * 60 * 2))
30 idle_time=$idle_limit
31 while read -r user; do
32 new_idle_time=$(sudo -u $user xprintidle 2>/dev/null) ||:
33 if [[ $new_idle_time && $new_idle_time -lt $idle_time ]]; then
34 idle_time=$new_idle_time
35 fi
36 done < <(users | tr " " "\n" | sort -u)
37 if (( idle_time < idle_limit )); then
38 idle=false
39 else
40 idle=true
41 fi
42 }
43
44 usage() {
45 cat <<EOF
46 Usage: ${0##*/} [ARGS]
47 Do btrfs maintence or stop if xprintidle shows a user
48
49 force Run regardless of user idle status on all disks.
50 check Only check if an existing maintence should be cancelled due to
51 nonidle user. Also, runs in a loop every 20 seconds for 10
52 minutes.
53
54 Note: Uses util-linux getopt option parsing: spaces between args and
55 options, short options can be combined, options before args.
56 EOF
57 exit $1
58 }
59
60
61 force=false
62 check=false
63 dryrun=false
64 if [[ $1 ]]; then
65 case $1 in
66 check)
67 check=true
68 ;;
69 force)
70 force=true
71 ;;
72 dryrun)
73 dryrun=true
74 ;;
75 *)
76 echo "$0: error: unexpected arg" >&2
77 usage 1
78 ;;
79 esac
80 fi
81
82
83 main() {
84 idle=true
85 if ! $force; then
86 check-idle
87 if ! $check; then
88 min=0
89 max_min=300
90 # When the cron kicks in, we may not be idle (physically sleeping) yet, so
91 # wait.
92 while ! $idle && (( min < max_min )); do
93 min=$(( min + 1 ))
94 sleep 60
95 check-idle
96 done
97 # If we've waited a really long time for idle, just give up.
98 if (( min == max_min )); then
99 return
100 fi
101 fi
102 fi
103
104
105 fnd="findmnt --types btrfs --noheading"
106 for x in $($fnd --output "SOURCE" --nofsroot | sort -u); do
107 mnt=$($fnd --output "TARGET" --first-only --source $x)
108 [[ $mnt ]] || continue
109
110 #### begin look for diff in stats, eg: increasing error count ####
111
112 # Only run for $check, since it runs in parallel to non-check, avoid
113 # race condition.
114 if $check; then
115 tmp=$(mktemp)
116 # if mnt is /, avoid making a buggy looking path
117 stats_path=${mnt%/}/btrfs-dev-stats
118 if [[ ! -e $stats_path ]]; then
119 btrfs dev stats -c $mnt >$stats_path ||: # populate initial reading
120 elif ! btrfs dev stats -c $mnt >$tmp; then
121 if ! diff -q $stats_path $tmp; then
122 exim -t <<EOF
123 From: root@$HOSTNAME.b8.nz
124 To: alerts@iankelling.org
125 Subject: btrfsmaintstop: btrfs dev stats -c $mnt
126
127 $(diff -u $stats_path $tmp)
128 EOF
129 mv $stats_path $stats_path.1
130 cat $tmp >$stats_path
131 fi
132 fi
133 rm -f $tmp
134 fi
135 #### end look for diff in stats, eg: increasing error count ####
136
137 if $check; then
138 if ! $idle; then
139 if $dryrun; then
140 echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt"
141 else
142 btrfs scrub cancel $mnt &>/dev/null ||:
143 fi
144 fi
145 continue
146 fi
147
148 # for comparing before and after balance.
149 # the log is already fairly verbose, so commented.
150 # e btrfs filesystem df $mnt
151 # e df -H $mnt
152 if btrfs filesystem df $mnt | grep -q "Data+Metadata"; then
153 for usage in $dusage; do
154 e ionice -c 3 btrfs balance start -dusage=$usage -musage=$usage $mnt
155 done
156 else
157 e ionice -c 3 btrfs balance start -dusage=0 $mnt
158 for usage in $dusage; do
159 e ionice -c 3 btrfs balance start -dusage=$usage $mnt
160 done
161 e ionice -c 3 btrfs balance start -musage=0 $mnt
162 for usage in $musage; do
163 e ionice -c 3 btrfs balance start -musage=$usage $mnt
164 done
165 fi
166 date=
167 scrub_status=$(btrfs scrub status $mnt)
168 if printf "%s\n" "$scrub_status" | grep -i '^status:[[:space:]]*finished$' &>/dev/null; then
169 date=$(printf "%s\n" "$scrub_status" | sed -rn 's/^Scrub started:[[:space:]]*(.*)/\1/p')
170 fi
171 if [[ ! $date ]]; then
172 # output from older versions, at least btrfs v4.15.1
173 date=$(
174 printf "%s\n" "$scrub_status" | \
175 sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p'
176 )
177 fi
178 if [[ $date ]]; then
179 if $dryrun; then
180 echo "$0: last scrub finish for $mnt: $date"
181 fi
182 date=$(date --date="$date" +%s)
183 # if date is sooner than 60 days ago
184 # the wiki recommends 30 days or so, but
185 # I'm going with 60 days.
186 if (( date > EPOCHSECONDS - 60*60*24*60 )); then
187 if $dryrun; then
188 echo "$0: skiping scrub of $mnt, last was $(( (EPOCHSECONDS - date) / 60/60/24 )) days ago, < 30 days"
189 fi
190 continue
191 fi
192 fi
193 # -c 2 -n 4 is from btrfsmaintenance, does ionice
194 e btrfs scrub start -Bd -c 2 -n 4 $mnt
195
196 # We normally only do one disk since this is meant to be run while I sleep
197 # and if we try to do all disks, we invariably end up doing a scrub still
198 # after I've woken up. So, just do one per day.
199 if ! $force; then
200 return 0
201 fi
202 done
203 }
204
205 loop-main() {
206 while true; do
207 main
208 sleep 60
209 done
210 }
211
212 if $check; then
213 loop-main
214 else
215 main
216 fi