watch mail queue length
[distro-setup] / system-status
1 #!/bin/bash
2 # Copyright (C) 2019 Ian Kelling
3 # SPDX-License-Identifier: AGPL-3.0-or-later
4
5 # usage: runs 4 times every 15 seconds unless any args are passed, then just runs once
6
7 if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi
8
9 source /a/bin/errhandle/err
10 status_file=/dev/shm/iank-status
11
12 shopt -s nullglob
13 shopt -s dotglob
14
15 lo() { /usr/local/bin/log-once "$@"; }
16
17 write-status() {
18 chars=()
19
20 glob=(/nocow/btrfs-stale/*)
21 if [[ -e ${glob[0]} ]]; then
22 chars+=("STALE!")
23 fi
24 glob=(/m/md/bounces/new/*)
25 if [[ -e ${glob[0]} ]]; then
26 chars+=("BOUNCE!")
27 bouncemsg="message in /m/md/bounces/new"
28 fi
29 lo -1 bounce $bouncemsg
30 glob=(/m/md/alerts/new/* /m/md/alerts/cur/*)
31 if [[ -e ${glob[0]} ]]; then
32 chars+=("ALERT!")
33 fi
34 if [[ -e /nocow/user/mailtest-failure ]]; then
35 chars+=("MAILPING!")
36 fi
37
38 qlen=$(/usr/sbin/exim -bpc)
39 if ((qlen)); then
40 chars+=("q $qlen")
41 fi
42
43 if ! make -q ~/.local/distro-begin || [[ $(<~/.local/distro-begin) != 0 ]]; then
44 chars+=("DISTRO-BEGIN!")
45 fi
46
47 if ! make -q ~/.local/distro-end || [[ $(<~/.local/distro-end) != 0 ]]; then
48 chars+=("DISTRO-END!")
49 fi
50
51 f=~/.local/conflink
52 if [[ -e $f ]]; then
53 cd /b/ds
54 now=$(date +%s)
55 fsec=$(stat -c%Y $f)
56 fmin=$(( (fsec - now ) / 60 + 1 ))
57 fminplus=$(( fmin + 60*24 ))
58 # Filesystem files get copied, so find any newer than the last run.
59 # The rest are hueristics:
60 # Given the last time we added a file in git, is that newer than the last conflink run.
61 # Given new files not added to git, were they modified more recently than the last conflink? but,
62 # push their modification time back by a day so we can develop them before needing to add them to git.
63 if (( $(date -d "$(git log --diff-filter=ACR --format=%aD -1)" +%s) > fsec )) || \
64 [[ $(find {/a/bin/ds,/p/c}{/filesystem,/machine_specific/$HOSTNAME/filesystem} -mmin $fmin -type f -print -quit 2>/dev/null) ]] \
65 || [[ $(find $(git ls-files -o --exclude-standard) -mmin $fminplus -type f -print -quit) ]]; then
66 chars+=("CONFLINK!")
67 fi
68 fi
69
70 if [[ ! -e $f || $(<$f) != 0 ]]; then
71 chars+=("CONFLINK!")
72 fi
73
74
75 ## Clean the paniclog, but only up to 4 times per day, or else we
76 ## should investigate.
77 loglog=/tmp/panicloglog-$(date --rfc-3339=date)
78 if [[ -s $loglog ]]; then
79 spamcount=$(stat -c%s $loglog)
80 else
81 spamcount=0
82 fi
83 if (( spamcount <= 4 )); then
84 if grep -q 'spam acl condition' /var/log/exim4/paniclog; then
85 printf . >>$loglog
86 fi
87 /a/bin/distro-setup/epanic-clean
88 fi
89
90 if [[ -s /var/log/exim4/paniclog ]]; then
91 chars+=("PANIC!")
92 tail -n 20 /var/log/exim4/paniclog | lo -1 paniclog
93 else
94 lo -1 paniclog
95 fi
96
97 source /a/bin/bash_unpublished/source-state
98 if [[ $MAIL_HOST == "$HOSTNAME" ]]; then
99 if [[ $(systemctl is-active btrbk.timer) != active ]]; then
100 chars+=("BTRBK.TIMER!")
101 bbkmsg="btrbk.timer not enabled"
102 fi
103 lo -60 btrbk.timer $bbkmsg
104
105 ## check if last snapshot was within an hour
106 vol=o
107 # this section generally copied from btrbk scripts, but
108 # this part modified to speed things up by about half a second.
109 # I'm not sure if its quite as reliable, but it looks pretty safe.
110 # Profiled it using time and also adding to the top of the file:
111 # set -x
112 # PS4='+ $(date "+%2N") '
113 snaps=($(ls -1avdr /mnt/root/btrbk/$vol.20*|head -n1))
114 now=$(date +%s)
115 maxtime=0
116 for s in ${snaps[@]}; do
117 file=${s##*/}
118 t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s)
119 if (( t > maxtime )); then
120 maxtime=$t
121 fi
122 done
123 if (( maxtime < now - 60*60 )); then
124 chars+=("OLD-SNAPSHOT!")
125 snapshotmsg="/o snapshot older than 1 hour"
126 fi
127 lo -1 old-snapshot $snapshotmsg
128 fi
129
130 cat /a/bin/bash_unpublished/source-state >$status_file
131
132 if [[ ${chars[*]} ]]; then
133 echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file
134 fi
135
136 }
137 write-status
138 if [[ $1 ]]; then
139 cat $status_file
140 exit 0
141 fi
142 for ((i=1; i<=3; i++)); do
143 sleep 15
144 write-status
145 done