labels:
severity: day
annotations:
- summary: 'jr -u mailtest-check -e'
+ summary: 'jr -u mailtest-check -e -n 10000'
- alert: mailtest_check_missing_dnswl
expr: |-
labels:
severity: day
annotations:
- summary: 'jr -u mailtest-check -e'
+ summary: 'jr -u mailtest-check -e -n 10000'
# We expect to be getting metrics, if we come up and notice we have
# any missing in the past, and it wasn't from a reboot, and we haven't
labels:
severity: warn
- - alert: 1pmtest
- expr: hour() == 17 and minute() < 5
+ # 10 am friday. but, do it 1 minute early so it is closer to actually
+ # firing at 10 am.
+ - alert: dead_man_test
+ expr: |-
+ ( hour() == 13 and minute() >= 59 or hour() == 14 and minute() < 3 ) and day_of_week() == 5
for: 0m
labels:
severity: daytest
annotations:
- summary: Prometheus daily test alert
+ summary: Prometheus weekly test alert
#### Inhibit notes ####
annotations:
summary: Target down for 30m
+ # note PrometheusAllTargetsMissing is intentionally omitted because it
+ # is redundant to the above.
+
- alert: target_down
expr: up{instance=~"kdwg:9101|bkex.b8.nz:9101|liex.b8.nz:9101|10.2.0.1:9100"} == 0
for: 5m
annotations:
summary: MAIL_HOST likely down for 5m
-
-# note, the next upstream metric is intentionally omitted:
-# https://github.com/samber/awesome-prometheus-alerts/issues/283
-
- alert: PrometheusConfigurationReloadFailure
expr: prometheus_config_last_reload_successful != 1
for: 30m
--- /dev/null
+# modified from
+# /lib/systemd/system/logrotate.service
+[Unit]
+Description=logrotate-fast
+Documentation=man:logrotate(8) man:logrotate.conf(5)
+ConditionACPower=true
+
+[Service]
+Type=oneshot
+ExecStart=/usr/sbin/logrotate /etc/logrotate-fast.conf
+
+# performance options
+Nice=19
+IOSchedulingClass=best-effort
+IOSchedulingPriority=7
+
+# hardening options
+# details: https://www.freedesktop.org/software/systemd/man/systemd.exec.html
+# no ProtectHome for userdir logs
+# no PrivateNetwork for mail deliviery
+# no ProtectKernelTunables for working SELinux with systemd older than 235
+# no MemoryDenyWriteExecute for gzip on i686
+
+# iank, commented, we need /dev
+#PrivateDevices=true
+
+PrivateTmp=true
+ProtectControlGroups=true
+ProtectKernelModules=true
+ProtectSystem=full
+RestrictRealtime=true
add_header = X-Spam_report: $spam_report
add_header = X-Spam_action: $spam_action
-warn
- !hosts = +iank_trusted
- !authenticated = plain_server:login_server
- condition = ${if def:malware_name}
- remove_header = Subject:
- add_header = Subject: [Clamav warning: $malware_name] $h_subject
- log_message = heuristic malware warning: $malware_name
#accept
# spf = pass:fail:softfail:none:neutral:permerror:temperror
var_export(\$CONFIG);
fwrite(STDOUT, ";\n");
EOF
- m php tmp.php >config.php
- m rm -f tmp.php
+ e running php tmp.php
+ php tmp.php >config.php
+ # leave in place for debugging
+ #m rm -f tmp.php
m sudo -u www-data php $ncdir/occ maintenance:update:htaccess
list=$(sudo -u www-data php $ncdir/occ --output=json_pretty app:list)
# user_external not compaible with nc 23
systemctl enable --now $ncbase.timer
i /usr/local/bin/ncup <<'EOFOUTER'
#!/bin/bash
-if ! test "$BASH_VERSION"; then echo "error: shell is not bash" >&2; exit 1; fi
-shopt -s inherit_errexit 2>/dev/null ||: # ignore fail in bash < 4.4
-set -eE -o pipefail
-trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" exit status: $?, PIPESTATUS: ${PIPESTATUS[*]}" >&2' ERR
-ncbase=$1
-if ! php /var/www/$ncbase/updater/updater.phar -n; then
- echo failed nextcloud update for $ncbase >&2
+source /usr/local/lib/err
+
+m() { printf "%s\n" "$*"; "$@"; }
+err-cleanup() {
+echo failed nextcloud update for $ncbase >&2
/sbin/exim -t <<EOF
To: alerts@iankelling.org
From: root@$(hostname -f)
For logs, run: jr -u $ncbase
EOF
+}
+
+if [[ $(id -u -n) != www-data ]]; then
+ echo error: running as wrong user: $(id -u -n), expected www-data
+ exit 1
fi
+
+if [[ ! $1 ]]; then
+ echo error: expected an arg, nextcloud relative base dir
+ exit 1
+fi
+
+ncbase=$1
+echo running: php /var/www/$ncbase/updater/updater.phar -n
+m php /var/www/$ncbase/updater/updater.phar -n
+cd /var/www/$ncbase
+m running php occ -n upgrade
EOFOUTER
chmod +x /usr/local/bin/ncup
# ** $MAIL_HOST|bk)
$MAIL_HOST|bk)
+
+ # no clamav on je, it has 1.5g memory and clamav uses most of it
+ i /etc/exim4/conf.d/clamav_data_acl <<'EOF'
+warn
+!hosts = +iank_trusted
+!authenticated = plain_server:login_server
+condition = ${if def:malware_name}
+remove_header = Subject:
+add_header = Subject: [Clamav warning: $malware_name] $h_subject
+log_message = heuristic malware warning: $malware_name
+EOF
+
cat >>/etc/exim4/conf.d/main/000_local <<EOF
# je.b8.nz will run out of memory with freshclam
av_scanner = clamd:/var/run/clamav/clamd.ctl
echo|i /etc/exim4/conf.d/rcpt_local_acl
echo|i /etc/exim4/conf.d/router/890_backup_copy
echo|i /etc/exim4/conf.d/main/000_local-nn
+ echo|i /etc/exim4/conf.d/clamav_data_acl
if $bhost_t; then
;;&
esac
+# for debugging dns issues
+case $HOSTNAME in
+ je|bk)
+ systemctl enable --now logrotate-fast.timer
+ ;;
+esac
+
# last use of $reload happens in previous block
rm -f /var/local/mail-setup-reload
test_tos=(testignore@expertpathologyreview.com testignore@je.b8.nz testignore@amnimal.ninja jtuttle@gnu.org)
cat >>/etc/cron.d/mailtest <<EOF
-0 13 * * * root echo "1pm alert. You are not in the matrix."
+# 10 am friday
+0 10 * * 5 root echo "weekly alert. You are not in the matrix."
2 * * * * root check-remote-mailqs |& log-once check-remote-mailqs
EOF
;;&
fi
#### end arg processing ####
-
-if ! $int; then
- sleep 60
-fi
+# we put this in to avoid dns errors that happen on reboot,
+# but I want to debug them.
+# if ! $int; then
+# sleep 60
+# fi
# TODO, get je to deliver the local mailbox: /m/md/INBOX
tmpfile=$(mktemp)
declare -i unexpected=0
declare -i missing_dnswl=0
+ declare -i dnsfail=0
for folder in ${folders[@]}; do
for from in ${froms[@]}; do
latest=
rm -f $resultfile
for r in ${results[@]}; do
case $r in
- DKIM_INVALID|T_SPF_TEMPERROR|T_SPF_HELO_TEMPERROR)
- missing_dnswl+=1
- ;;
+ # iank: for when we want to handle dns errors differently
+ # DKIM_INVALID|T_SPF_TEMPERROR|T_SPF_HELO_TEMPERROR)
+ # dnsfail+=1
+ # ;;
*)
unexpected=$(( unexpected + 1 ))
;;
# We expect dns failures from time to time, so
# we count them separately and alert differently.
case $miss in
- DKIM_VALID|DKIM_VALID_AU|DKIM_VALID_EF|SPF_HELO_PASS|SPF_PASS|RCVD_IN_DNSWL_MED|DKIMWL_WL_HIGH)
+ # iank: dns fail
+ # DKIM_VALID|DKIM_VALID_AU|DKIM_VALID_EF|SPF_HELO_PASS|SPF_PASS|
+ RCVD_IN_DNSWL_MED|DKIMWL_WL_HIGH)
missing_dnswl+=1
;;
*)