From b28eebdf9143aa17733f233b30b96f462008f3b6 Mon Sep 17 00:00:00 2001 From: Ian Kelling Date: Sun, 12 Jun 2022 14:10:43 -0400 Subject: [PATCH] some fixes, and dns debugging on bk --- filesystem/etc/prometheus/rules/iank.yml | 20 +++--- .../filesystem/etc/logrotate-fast.conf | 12 ++++ .../etc/systemd/system/logrotate-fast.service | 31 +++++++++ .../etc/systemd/system/logrotate-fast.timer | 14 ++++ .../etc/unbound/unbound.conf.d/ian.conf | 7 +- mail-setup | 64 ++++++++++++++----- mailtest-check | 21 +++--- 7 files changed, 133 insertions(+), 36 deletions(-) create mode 100644 machine_specific/bitfolk/filesystem/etc/logrotate-fast.conf create mode 100644 machine_specific/bitfolk/filesystem/etc/systemd/system/logrotate-fast.service create mode 100644 machine_specific/bitfolk/filesystem/etc/systemd/system/logrotate-fast.timer diff --git a/filesystem/etc/prometheus/rules/iank.yml b/filesystem/etc/prometheus/rules/iank.yml index 651eb00..f64322b 100644 --- a/filesystem/etc/prometheus/rules/iank.yml +++ b/filesystem/etc/prometheus/rules/iank.yml @@ -182,7 +182,7 @@ groups: labels: severity: day annotations: - summary: 'jr -u mailtest-check -e' + summary: 'jr -u mailtest-check -e -n 10000' - alert: mailtest_check_missing_dnswl expr: |- @@ -191,7 +191,7 @@ groups: labels: severity: day annotations: - summary: 'jr -u mailtest-check -e' + summary: 'jr -u mailtest-check -e -n 10000' # We expect to be getting metrics, if we come up and notice we have # any missing in the past, and it wasn't from a reboot, and we haven't @@ -203,13 +203,16 @@ groups: labels: severity: warn - - alert: 1pmtest - expr: hour() == 17 and minute() < 5 + # 10 am friday. but, do it 1 minute early so it is closer to actually + # firing at 10 am. + - alert: dead_man_test + expr: |- + ( hour() == 13 and minute() >= 59 or hour() == 14 and minute() < 3 ) and day_of_week() == 5 for: 0m labels: severity: daytest annotations: - summary: Prometheus daily test alert + summary: Prometheus weekly test alert #### Inhibit notes #### @@ -278,6 +281,9 @@ groups: annotations: summary: Target down for 30m + # note PrometheusAllTargetsMissing is intentionally omitted because it + # is redundant to the above. + - alert: target_down expr: up{instance=~"kdwg:9101|bkex.b8.nz:9101|liex.b8.nz:9101|10.2.0.1:9100"} == 0 for: 5m @@ -294,10 +300,6 @@ groups: annotations: summary: MAIL_HOST likely down for 5m - -# note, the next upstream metric is intentionally omitted: -# https://github.com/samber/awesome-prometheus-alerts/issues/283 - - alert: PrometheusConfigurationReloadFailure expr: prometheus_config_last_reload_successful != 1 for: 30m diff --git a/machine_specific/bitfolk/filesystem/etc/logrotate-fast.conf b/machine_specific/bitfolk/filesystem/etc/logrotate-fast.conf new file mode 100644 index 0000000..d1170e6 --- /dev/null +++ b/machine_specific/bitfolk/filesystem/etc/logrotate-fast.conf @@ -0,0 +1,12 @@ +compress +/dev/shm/u.log { + # dunno if this is needed but it can avoid problems. + delaycompress + su unbound unbound + rotate 20 + size 10M + # copied from clamav + postrotate + systemctl -q is-active unbound && systemctl kill --signal=SIGHUP unbound || true + endscript +} diff --git a/machine_specific/bitfolk/filesystem/etc/systemd/system/logrotate-fast.service b/machine_specific/bitfolk/filesystem/etc/systemd/system/logrotate-fast.service new file mode 100644 index 0000000..54bb56f --- /dev/null +++ b/machine_specific/bitfolk/filesystem/etc/systemd/system/logrotate-fast.service @@ -0,0 +1,31 @@ +# modified from +# /lib/systemd/system/logrotate.service +[Unit] +Description=logrotate-fast +Documentation=man:logrotate(8) man:logrotate.conf(5) +ConditionACPower=true + +[Service] +Type=oneshot +ExecStart=/usr/sbin/logrotate /etc/logrotate-fast.conf + +# performance options +Nice=19 +IOSchedulingClass=best-effort +IOSchedulingPriority=7 + +# hardening options +# details: https://www.freedesktop.org/software/systemd/man/systemd.exec.html +# no ProtectHome for userdir logs +# no PrivateNetwork for mail deliviery +# no ProtectKernelTunables for working SELinux with systemd older than 235 +# no MemoryDenyWriteExecute for gzip on i686 + +# iank, commented, we need /dev +#PrivateDevices=true + +PrivateTmp=true +ProtectControlGroups=true +ProtectKernelModules=true +ProtectSystem=full +RestrictRealtime=true diff --git a/machine_specific/bitfolk/filesystem/etc/systemd/system/logrotate-fast.timer b/machine_specific/bitfolk/filesystem/etc/systemd/system/logrotate-fast.timer new file mode 100644 index 0000000..962c7e8 --- /dev/null +++ b/machine_specific/bitfolk/filesystem/etc/systemd/system/logrotate-fast.timer @@ -0,0 +1,14 @@ +[Unit] +Description=btrfsmaint + +[Timer] +# we could programatically get this via: +# timedatectl show --property=Timezone | sed 's/^[^=]*=//' +# or +# readlink /etc/localtime | sed -r 's,^.*/([^/]+/[^/]+)$,\1,' + +# every 5 minutes +OnCalendar=*-*-* *:00/5:00 + +[Install] +WantedBy=timers.target diff --git a/machine_specific/bitfolk/filesystem/etc/unbound/unbound.conf.d/ian.conf b/machine_specific/bitfolk/filesystem/etc/unbound/unbound.conf.d/ian.conf index 5b1cfef..7117bf9 100644 --- a/machine_specific/bitfolk/filesystem/etc/unbound/unbound.conf.d/ian.conf +++ b/machine_specific/bitfolk/filesystem/etc/unbound/unbound.conf.d/ian.conf @@ -13,8 +13,11 @@ server: ## This is very verbose, fills up 4g of logs in 8 hours on bk.b8.nz. I think ## it leads to spamassassin dns timeout (1 second) when the system first ## starts. -#verbosity: 4 -# +verbosity: 4 +logfile: /dev/shm/u.log +log-time-ascii: yes + + interface: 127.0.0.1 interface: ::1 diff --git a/mail-setup b/mail-setup index de9db48..5d18493 100755 --- a/mail-setup +++ b/mail-setup @@ -1249,13 +1249,6 @@ warn add_header = X-Spam_report: $spam_report add_header = X-Spam_action: $spam_action -warn - !hosts = +iank_trusted - !authenticated = plain_server:login_server - condition = ${if def:malware_name} - remove_header = Subject: - add_header = Subject: [Clamav warning: $malware_name] $h_subject - log_message = heuristic malware warning: $malware_name #accept # spf = pass:fail:softfail:none:neutral:permerror:temperror @@ -2281,8 +2274,10 @@ fwrite(STDOUT, "config.php - m rm -f tmp.php + e running php tmp.php + php tmp.php >config.php + # leave in place for debugging + #m rm -f tmp.php m sudo -u www-data php $ncdir/occ maintenance:update:htaccess list=$(sudo -u www-data php $ncdir/occ --output=json_pretty app:list) # user_external not compaible with nc 23 @@ -2316,14 +2311,12 @@ EOF systemctl enable --now $ncbase.timer i /usr/local/bin/ncup <<'EOFOUTER' #!/bin/bash -if ! test "$BASH_VERSION"; then echo "error: shell is not bash" >&2; exit 1; fi -shopt -s inherit_errexit 2>/dev/null ||: # ignore fail in bash < 4.4 -set -eE -o pipefail -trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" exit status: $?, PIPESTATUS: ${PIPESTATUS[*]}" >&2' ERR -ncbase=$1 -if ! php /var/www/$ncbase/updater/updater.phar -n; then - echo failed nextcloud update for $ncbase >&2 +source /usr/local/lib/err + +m() { printf "%s\n" "$*"; "$@"; } +err-cleanup() { +echo failed nextcloud update for $ncbase >&2 /sbin/exim -t <>/etc/exim4/conf.d/main/000_local <>/etc/cron.d/mailtest <