From: Ian Kelling Date: Sun, 27 Mar 2022 10:02:27 +0000 (-0400) Subject: lots of fixes, automation for bitfolk X-Git-Url: https://iankelling.org/git/?a=commitdiff_plain;h=802e885e3e7fa3857f8bc4f54c261d5ca76f2454;p=distro-setup lots of fixes, automation for bitfolk --- diff --git a/bitfolk-chroot-install b/bitfolk-chroot-install index 37e77b7..a28efee 100755 --- a/bitfolk-chroot-install +++ b/bitfolk-chroot-install @@ -42,8 +42,7 @@ cat >/root/.ssh/authorized_keys <<'EOF' ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDX42yru/h6r6UDRy/VwVZjcYEmNLG5/SUjv7xwu43OaW0wL+uHYg2rkfn4Ygh5o1I5pgBh2SWg8TeWuVGhgL1SCuBzzeai/+58Sny065Qak+D2WjVOuRonRelh+CBA5EpNZPuiWQkoWdf9NACTBCbS2Zu7r8OOgRqu/ruaDNePlG5+U0Wlpy3oBnpbzQiuSA3AKMW30fsCJtOBjz5qQaiPbYEKJy3AOvtbq10wliKx9TpsTzrq8dKWs7PLhZnzqVCsaq6D95IzjqXcSpx4Cga5bn+YEuAnJQ53PGA5eO+hpz6HDmawTbJlaV/Dufb9bJ/ZZy1DXzs07yWRtTEY54/X ian@iankelling.org EOF - -# todo update this and hostname depending on host +# https://tools.bitfolk.com/wiki/IPv6 cat >/etc/network/interfaces </etc/hostname </etc/timezone +if [[ -L /etc/localtime ]]; then + ln -sf /usr/share/zoneinfo/${TIMEZONE} /etc/localtime +else + cp -f /usr/share/zoneinfo/${TIMEZONE} /etc/localtime +fi + + +echo $0 SUCCESS diff --git a/bitfolk-rescue-init b/bitfolk-rescue-init index 9ff0a30..938f0f2 100644 --- a/bitfolk-rescue-init +++ b/bitfolk-rescue-init @@ -8,35 +8,44 @@ ssh iankelling@iankelling.console.bitfolk.com destroy rescue + sudo -i mkdir -p /root/.ssh chmod 700 /root/.ssh cat >/root/.ssh/authorized_keys <<'EOF' ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDX42yru/h6r6UDRy/VwVZjcYEmNLG5/SUjv7xwu43OaW0wL+uHYg2rkfn4Ygh5o1I5pgBh2SWg8TeWuVGhgL1SCuBzzeai/+58Sny065Qak+D2WjVOuRonRelh+CBA5EpNZPuiWQkoWdf9NACTBCbS2Zu7r8OOgRqu/ruaDNePlG5+U0Wlpy3oBnpbzQiuSA3AKMW30fsCJtOBjz5qQaiPbYEKJy3AOvtbq10wliKx9TpsTzrq8dKWs7PLhZnzqVCsaq6D95IzjqXcSpx4Cga5bn+YEuAnJQ53PGA5eO+hpz6HDmawTbJlaV/Dufb9bJ/ZZy1DXzs07yWRtTEY54/X ian@iankelling.org EOF + apt update -apt -y install openssh-server +if [[ -e /usr/sbin/sshd ]]; then + systemctl restart ssh +else + apt -y install openssh-server +fi ##### in another terminal ###### -host=je -scp /b/ds/bitfolk* root@$host.b8.nz: -ssh root@$host ./bitfolk-rescue-install $host +h=bk + +scp -pr --chown=root:root /p/c/machine_specific/$h/filesystem/etc/ssh/ root@$h.b8.nz:/etc/ssh + +scp /b/ds/bitfolk* root@$h.b8.nz: + +# initially saved via: +# mkc /p/c/machine_specific/$h/filesystem/etc/ssh/ +# rsync -a root@$h:/etc/ssh/ssh_host* . +ssh root@$h ./bitfolk-rescue-install $h ### back to the 1st terminal -poweroff boot -# press ctrl ] +# after boot, press ctrl ] exit -jepush -# todo: lets copy the host keys around so we dont have to do this. -khfix je -sl root@je /a/bin/ds/distro-begin +h=bk +${h}push +sl root@$h /a/bin/ds/distro-begin -# todo, fix it so i can ssh to -sl je /a/bin/ds/distro-begin -sl je /a/bin/ds/distro-end +sl $h /a/bin/ds/dall diff --git a/bitfolk-rescue-install b/bitfolk-rescue-install index 3270a3b..28367e3 100755 --- a/bitfolk-rescue-install +++ b/bitfolk-rescue-install @@ -105,8 +105,9 @@ chrbind host=$1 cp /root/bitfolk-chroot-install /mnt +mkdir -p /mnt/etc/ssh +cp -a /etc/ssh/ssh_host* /mnt/etc/ssh chroot . /bitfolk-chroot-install $host poweroff -boot diff --git a/bk-backup b/bk-backup index cc25cec..24fa105 100755 --- a/bk-backup +++ b/bk-backup @@ -1,5 +1,7 @@ #!/bin/bash +# usage: $0 [restore] + if ! test "$BASH_VERSION"; then echo "error: shell is not bash" >&2; exit 1; fi shopt -s inherit_errexit 2>/dev/null ||: # ignore fail in bash < 4.4 set -eE -o pipefail @@ -19,17 +21,22 @@ case $1 in ;; esac +# last checked 2022-03 version 23 +# https://docs.nextcloud.com/server/latest/admin_manual/maintenance/restore.html if $restore; then set -x for ncdir in /var/www/ncexpertpath /var/www/ncninja; do ncbase=${ncdir##*/} ssh root@$host sudo -u www-data php $ncdir/occ -q maintenance:mode --on ||: # might not be running - rsync -ra /p/bkbackup/$ncbase/ root@$host:$ncdir || ret=$? + rsync -ravhi --numeric-ids /p/bkbackup/$ncbase/ root@$host:$ncdir || ret=$? # https://docs.nextcloud.com/server/20/admin_manual/configuration_server/occ_command.html#maintenance-commands-label ssh root@$host sudo -u www-data php $ncdir/occ -q maintenance:data-fingerprint - ssh root@$host sudo -u www-data php $ncdir/occ -q maintenance:mode --on + ssh root@$host sudo -u www-data php $ncdir/occ -q maintenance:mode --off done - rsync -ravi /p/bkbackup/m root@$host:/ + # the dovecot thing is not needed afaik, just a good practice. + ssh root@$host systemctl stop dovecot + rsync -ravi --numeric-ids /p/bkbackup/m root@$host:/ + ssh root@$host systemctl start dovecot exit 0 fi @@ -37,19 +44,16 @@ ret=0 if [[ $HOSTNAME == $MAIL_HOST ]]; then mkdir -p /p/bkbackup for ncdir in /var/www/ncexpertpath /var/www/ncninja; do - if [[ ! -d $ncdir ]]; then - continue - fi ncbase=${ncdir##*/} mkdir -p /p/bkbackup/$ncbase ssh root@$host sudo -u www-data php $ncdir/occ -q maintenance:mode --on - rsync -ra --exclude=testignore --delete root@$host:$ncdir/{config,data,themes} /p/bkbackup/$ncbase || ret=$? + rsync --numeric-ids -ra --delete root@$host:$ncdir/{config,data,themes} /p/bkbackup/$ncbase || ret=$? ssh root@$host sudo -u www-data php $ncdir/occ -q maintenance:mode --off if (( ret )); then echo "$0: error: failed rsync $ncdir" ret=1 fi done - rsync -ra --delete root@$host:/m /p/bkbackup + rsync --numeric-ids -ra --delete root@$host:/m /p/bkbackup fi exit $ret diff --git a/brc b/brc index 99b5c03..00a4862 100644 --- a/brc +++ b/brc @@ -1389,7 +1389,6 @@ safe_rename() { # warn and dont rename if file exists. } - sd() { sudo dd status=none of="$1" } @@ -1405,6 +1404,10 @@ ser() { s service $2 $1 fi } +serstat() { + systemctl -n 40 status "$@" +} + seru() { systemctl --user "$@"; } # like restart, but do nothing if its not already started srestart() { @@ -1940,6 +1943,16 @@ unset safe_term match_lhs use_color if [[ $- == *i* ]]; then + + case $HOSTNAME in + bk|je|li) + if [[ $EUID == 1000 ]]; then + system-status _ ||: + fi + ;; + esac + + # this needs to come before next ps1 stuff # this stuff needs bash 4, feb 2009, # old enough to no longer condition on $BASH_VERSION anymore diff --git a/brc2 b/brc2 index c233c92..e392cf5 100644 --- a/brc2 +++ b/brc2 @@ -1647,18 +1647,24 @@ enn() { sdnbash() { # systemd namespace bash local unit=$1 - m sudo nsenter -t $(systemctl show --property MainPID --value $unit') -n -m sudo -u $USER -i bash + m sudo nsenter -t $(systemctl show --property MainPID --value $unit) -n -m sudo -u $USER -i bash } mailnnbash() { - m sudo nsenter -t $(systemctl show --property MainPID --value mailnn') -n -m sudo -u $USER -i bash + m sudo nsenter -t $(systemctl show --property MainPID --value mailnn) -n -m sudo -u $USER -i bash } mailvpnbash() { m sudo nsenter -t $(pgrep -f "/usr/sbin/openvpn .* --config /etc/openvpn/.*mail.conf") -n -m sudo -u $USER -i bash } eximbash() { - m sudo nsenter -t $(pgrep -f "/usr/sbin/exim4 -bd -q30m -C /etc/exim4/my.conf"|h1) -n -m sudo -u $USER -i bash + local pid + pid=$(pgrep -f "/usr/sbin/exim4 -bd -q30m -C /etc/exim4/my.conf"|h1) + if [[ ! $pid ]]; then + echo "eximbash: failed to find exim pid. systemctl -n 30 status exim4:" + systemctl status exim4 + fi + m sudo nsenter -t $pid -n -m } spamnn() { local spamdpid @@ -1741,7 +1747,7 @@ vpn() { sudo systemd-tty-ask-password-agent } -ufix() { +fixu() { ls -lad /run/user/1000 s chmod 700 /run/user/1000; s chown iank.iank /run/user/1000 } diff --git a/dall b/dall new file mode 100755 index 0000000..3ed8d44 --- /dev/null +++ b/dall @@ -0,0 +1,12 @@ +#!/bin/bash + +if ! test "$BASH_VERSION"; then echo "error: shell is not bash" >&2; exit 1; fi +shopt -s inherit_errexit 2>/dev/null ||: # ignore fail in bash < 4.4 +set -eE -o pipefail +trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" exit status: $?, PIPESTATUS: ${PIPESTATUS[*]}" >&2' ERR + +readonly this_file="$(readlink -f -- "${BASH_SOURCE[0]}")" +readonly this_dir="${this_file%/*}" +cd "$this_dir" +./distro-begin +./distro-end diff --git a/distro-begin b/distro-begin index e2d1a87..84900fe 100755 --- a/distro-begin +++ b/distro-begin @@ -236,12 +236,13 @@ sudo sed -i --follow-symlinks -f - /etc/hosts <= 60 * 12 + labels: + severity: day + annotations: + summary: '12 minutes down' + + # 42 mins: enough for a 30 min queue run plus 12 + - alert: mailtest_check_vps + expr: |- + time() - mailtest_check_last_usec{job="tlsnode"} >= 60 * 42 + labels: + severity: prod + annotations: + summary: '42 minutes down' - - alert: mailtest_check + - alert: mailtest_check_mailhost expr: |- - time() - mailtest_check_last_usec > 60 * 12 + time() - max by (folder,from) (mailtest_check_last_usec{job="node"}) >= 60 * 12 labels: severity: day annotations: summary: '12 minutes down' # 42 mins: enough for a 30 min queue run plus 12 - - alert: mailtest_check + - alert: mailtest_check_mailhost expr: |- - time() - mailtest_check_last_usec > 60 * 42 + time() - max by (folder,from) (mailtest_check_last_usec{job="node"}) >= 60 * 42 labels: severity: prod annotations: - summary: '43 minutes down' + summary: '42 minutes down' + - alert: 1pmtest expr: hour() == 17 and minute() < 5 @@ -101,22 +134,26 @@ groups: summary: Prometheus daily test alert - -# alternate expression, to calculate if the alert would have fired is: +#### Inhibit notes #### +## Example of expressions to detect if the target_down alert +# fired in the last 24 hours. Initially, I thought his could +# be an alert which inhibits up_resets, but eventually I figured +# that doesn't make much sense, and the idea of using an alert +# that is not an indication of something wrong, only inhibits another +# alert, I think works better to integrate directly into the +# alert it would inhibit, this may mean a recording rule. That avoids +# an alert we have to ignore or filter out. +# +# Alternate expression, to calculate if the alert would have fired is: # min_over_time(sum_over_time(up[30m])[1d:]) == 0 # where 30m matches the for: time in target_down # -# sum_over_time is not needed, just convenience for graphing - - alert: target_down_inhibitor - expr: |- - sum_over_time(ALERTS{alertname="target_down"}[1d]) - labels: - severity: ignore - annotations: - summary: alert that indicates target_down alert fired in the last day - description: "VALUE = {{ $value }}" +# Note: for graphing, surround in the expression in sum_over_time() +# ALERTS{alertname="target_down",alertstate="firing"}[1d] +#### end Inhibit notes #### -# For targets where we alert except for longer downtimes, we + +# For targets where we alert only on long downtimes, we # still want to know if it is going down many times for short times over # a long period of time. But ignore reboots. # @@ -124,18 +161,16 @@ groups: # avg_over_time(node_systemd_unit_state{name="dynamicipupdate.service",state="active"}[1d]) < .95 - alert: up_resets expr: |- - resets(up[3d]) - changes(node_boot_time_seconds[3d]) > 15 + resets(up[2d]) - changes(node_boot_time_seconds[2d]) > 12 labels: severity: warn annotations: - summary: "Target has gone down {{ $value }} times in 3 days, > 15" - + summary: "Target has gone down {{ $value }} times in 2 days, > 12" # https://awesome-prometheus-alerts.grep.to/rules - # todo, we should probably group the prometheus alerts that indicate a # host-local problem. # eg, set a label alert-group: local-prom, then make a receiver that diff --git a/filesystem/etc/systemd/system/epanicclean.service b/filesystem/etc/systemd/system/epanicclean.service index 5a0167e..e5addfa 100644 --- a/filesystem/etc/systemd/system/epanicclean.service +++ b/filesystem/etc/systemd/system/epanicclean.service @@ -5,7 +5,7 @@ StartLimitIntervalSec=0 [Service] Type=simple -ExecStart=/usr/local/bin/sysd-mail-once -3 epanic-clean /usr/local/bin/epanic-clean +ExecStart=/usr/local/bin/epanic-clean Restart=always RestartSec=600 diff --git a/filesystem/usr/local/bin/myterm b/filesystem/usr/local/bin/myterm deleted file mode 100755 index 0ccb39c..0000000 --- a/filesystem/usr/local/bin/myterm +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh -# using bin/sh for speed since the script is very simpl.e - -# these system76 systems have garbled display with konsole -# and some other apps like mumble. something about the intel -# graphics i think. -case $HOSTNAME in - sy|bo) - exec sakura "$@" - ;; - *) - exec konsole "$@" - ;; -esac diff --git a/i3-sway/common.conf b/i3-sway/common.conf index 29a25c4..5860ee4 100644 --- a/i3-sway/common.conf +++ b/i3-sway/common.conf @@ -81,7 +81,7 @@ bindsym $mod+9 workspace 10 bindsym $mod+Shift+m border toggle bindsym $mod+j exec emacsclient -c -bindsym $mod+k exec myterm +bindsym $mod+k exec konsole bindsym $mod+l exec dmenu_run # note default is 27% on my system76. not sure if these # keybinds will screw up other laptop brightness keys. diff --git a/machine_specific/s76.hosts b/machine_specific/s76.hosts new file mode 100644 index 0000000..90a6702 --- /dev/null +++ b/machine_specific/s76.hosts @@ -0,0 +1,2 @@ +bo +sy diff --git a/machine_specific/s76/filesystem/etc/X11/xorg.conf.d/20-intel.conf b/machine_specific/s76/filesystem/etc/X11/xorg.conf.d/20-intel.conf new file mode 100644 index 0000000..da6c815 --- /dev/null +++ b/machine_specific/s76/filesystem/etc/X11/xorg.conf.d/20-intel.conf @@ -0,0 +1,7 @@ +# https://forums.linuxmint.com/viewtopic.php?f=208&t=224942#p1197049 +# prevents konsole from being borderline unusable on system76 intel graphics + i3 +Section "Device" + Identifier "Intel Graphics" + Driver "intel" + Option "TearFree" "true" +EndSection diff --git a/mail-cert-cron b/mail-cert-cron index cee7568..5b63b9a 100755 --- a/mail-cert-cron +++ b/mail-cert-cron @@ -20,7 +20,7 @@ case $HOSTNAME in $MAIL_HOST|bk) local_mx=mail.iankelling.org # ||: is to allow for temporary connection issues. - rsync ${opt[@]} -ogtL --chown=root:Debian-exim --chmod=640 \ + rsync "${opt[@]}" -ogtL --chown=root:Debian-exim --chmod=640 \ root@li.iankelling.org:/etc/letsencrypt/live/mail.iankelling.org/{fullchain.pem,privkey.pem} /etc/exim4 ||: if ! openssl x509 -checkend $(( 60 * 60 * 24 * 3 )) -noout -in /etc/exim4/fullchain.pem; then echo "$0: error!: cert rsync failed and it will expire in less than 3 days" diff --git a/mail-setup b/mail-setup index 0a41df0..5166faa 100755 --- a/mail-setup +++ b/mail-setup @@ -3,6 +3,9 @@ # Copyright (C) 2019 Ian Kelling # SPDX-License-Identifier: AGPL-3.0-or-later + +# todo: add a prometheus alert for dovecot. + # todo: handle errors like this: # Mar 02 12:44:26 kw systemd[1]: exim4.service: Found left-over process 68210 (exim4) in control group while starting unit. Ignoring. # Mar 02 12:44:26 kw systemd[1]: This usually indicates unclean termination of a previous run, or service implementation deficiencies. @@ -22,8 +25,6 @@ # todo: run mailping test after running, or otherwise # clear out terminal alert -# todo: reinstall bk with bigger filesystem - # todo: on bk, dont send email if mailvpn is not up # todo: mailtest-check should check on bk too @@ -385,11 +386,16 @@ EOF fi # light version of exim does not have sasl auth support. -pi-nostart exim4 exim4-daemon-heavy spamassassin openvpn unbound clamav-daemon wireguard +pi-nostart exim4 exim4-daemon-heavy spamassassin unbound clamav-daemon wireguard # note: pyzor debian readme says you need to run some initialization command # but its outdated. pi spf-tools-perl p0f postgrey pyzor razor jq moreutils certbot fail2ban +case $HOSTNAME in + je) : ;; + # not included due to using wireguard: openvpn + *) pi wget git unzip iptables ;; +esac # bad packages that sometimes get automatically installed pu openresolv resolvconf @@ -484,7 +490,6 @@ case $HOSTNAME in i /etc/systemd/system/wg-quick@wgmail.service.d/override.conf <&2 - exit 1 - fi - ;; -esac + +# With openvpn, I didn't get around to persisting the openvpn +# cert/configs into /p/c/machine_specific/bk, so I had this case to +# manually get the cert. However, we aren't using openvpn anymore, so it +# is commented out. +# +# case $HOSTNAME in +# bk) +# if [[ ! -e /etc/openvpn/client/mail.conf ]]; then +# echo "$0: error: first, on a system with /p/c/filesystem, run mail-setup, or the vpn-mk-client-cert line above this err" 2>&2 +# exit 1 +# fi +# ;; +# esac m rsync -aiSAX --chown=root:root --chmod=g-s /a/bin/ds/mail-cert-cron /usr/local/bin @@ -1556,17 +1567,23 @@ xioE3sYKdjOt+p6mlg3l8+OLtODEFPHDqwIBAg== -----END DH PARAMETERS----- EOF { + if [[ $HOSTNAME == "$MAIL_HOST" ]]; then cat <<'EOF' ssl_cert = config.php - m rm tmp.php + m rm -f tmp.php m sudo -u www-data php $ncdir/occ maintenance:update:htaccess list=$(sudo -u www-data php $ncdir/occ --output=json_pretty app:list) # user_external not compaible with nc 23 @@ -2279,11 +2296,12 @@ For logs, run: jr -u $ncbase EOF fi EOFOUTER + chmod +x /usr/local/bin/ncup mkdir -p /var/www/cron-errors chown www-data.www-data /var/www/cron-errors i /etc/cron.d/$ncbase </etc/cron.d/mailtest </usr/local/bin/send-test-forward <<'EOF' #!/bin/bash olds=( -$(/sbin/exiqgrep -o 260 -i -r '^(testignore@(iankelling\.org|zroe\.org|expertpathologyreview\.com|amnimal\.ninja|je\.b8\.nz)|jtuttle@gnu\.org)$') +$(/usr/sbin/exiqgrep -o 260 -i -r '^(testignore@(iankelling\.org|zroe\.org|expertpathologyreview\.com|amnimal\.ninja|je\.b8\.nz)|jtuttle@gnu\.org)$') ) if (( ${#olds[@]} )); then - /sbin/exim -Mrm "${olds[@]}" >/dev/null + /usr/sbin/exim -Mrm "${olds[@]}" >/dev/null fi EOF for test_from in ${test_froms[@]}; do diff --git a/pkgs b/pkgs index df99176..a68891d 100644 --- a/pkgs +++ b/pkgs @@ -4,6 +4,7 @@ # packages with the same name across distros. p1=( + bind9-host cryptsetup lvm2 mbuffer @@ -22,6 +23,7 @@ p2=( htop iptables mailutils + nano nmon needrestart ntp diff --git a/rootsshsync b/rootsshsync index 900a662..a77bf23 100755 --- a/rootsshsync +++ b/rootsshsync @@ -59,7 +59,7 @@ if [[ -e $user_ssh_dir/config ]]; then fi chown -R root:root /root/.ssh -rsync -t --chmod=755 --chown=root:root /a/bin/ds/hssh /usr/local/bin +rsync -tp --chmod=755 --chown=root:root /a/bin/ds/hssh /usr/local/bin if [[ -e /a/opt/btrbk/ssh_filter_btrbk.sh ]]; then install /a/opt/btrbk/ssh_filter_btrbk.sh /usr/local/bin diff --git a/subdir_files/.config/i3/config b/subdir_files/.config/i3/config index 01b5c2c..3ed4cdd 100644 --- a/subdir_files/.config/i3/config +++ b/subdir_files/.config/i3/config @@ -81,7 +81,7 @@ bindsym $mod+9 workspace 10 bindsym $mod+Shift+m border toggle bindsym $mod+j exec emacsclient -c -bindsym $mod+k exec myterm +bindsym $mod+k exec konsole bindsym $mod+l exec dmenu_run # note default is 27% on my system76. not sure if these # keybinds will screw up other laptop brightness keys. diff --git a/subdir_files/.config/sway/config b/subdir_files/.config/sway/config index ba675c2..f0e45c4 100644 --- a/subdir_files/.config/sway/config +++ b/subdir_files/.config/sway/config @@ -81,7 +81,7 @@ bindsym $mod+9 workspace 10 bindsym $mod+Shift+m border toggle bindsym $mod+j exec emacsclient -c -bindsym $mod+k exec myterm +bindsym $mod+k exec konsole bindsym $mod+l exec dmenu_run # note default is 27% on my system76. not sure if these # keybinds will screw up other laptop brightness keys. diff --git a/system-status b/system-status index f50d238..c11024b 100755 --- a/system-status +++ b/system-status @@ -7,6 +7,11 @@ if [ -z "$BASH_VERSION" ]; then echo "error: shell is not bash" >&2; exit 1; fi +if [[ $EUID != 1000 ]]; then + echo "$0: error, expected to be user 1000" + exit 1 +fi + source /a/bin/errhandle/err status_file=/dev/shm/iank-status @@ -28,6 +33,7 @@ v() { printf "%s\n" "$*" fi } +p() { printf "%s\n" "$*"; } # log-once COUNT NAME [MESSAGE] lo() { if type -p ifne &>/dev/null; then @@ -45,7 +51,6 @@ loday() { write-status() { chars=("${first_chars[@]}") - services=( epanicclean ) case $HOSTNAME in bk|je|li) : ;; @@ -55,28 +60,26 @@ write-status() { btrfsmaintstop dynamicipupdate ) + bads=() + if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then + for s in ${services[@]}; do + if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then + bads+=($s) + fi + done + chars+=(MYSERS) + fi + p ${bads[*]} | lo -240 mysers ;; esac - bads=() - if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then - for s in ${services[@]}; do - if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then - bads+=($s) - fi - done - chars+=(MYSERS) - - fi - lo -240 mysers ${bads[*]} - - services=( - prometheus-node-exporter - prometheus-alertmanager - prometheus - ) case $HOSTNAME in kd) + services=( + prometheus-node-exporter + prometheus-alertmanager + prometheus + ) bads=() if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then for s in ${services[@]}; do @@ -86,27 +89,96 @@ write-status() { done chars+=(PROM) fi - lo -240 prom ${bads[*]} + p ${bads[*]} | lo -240 prom ;; esac - # clock us out in timetrap if are idle too long - if [[ -e /p/.timetrap.db ]]; then - export DISPLAY=:0 - if type -p xprintidle &>/dev/null && xidle=$(xprintidle 2>/dev/null); then - if [[ $xidle == [0-9]* ]]; then - sheet=$(sqlite3 /p/.timetrap.db "select sheet from entries where end is NULL;") - idle=300000 - if [[ $sheet == w ]]; then - idle=900000 - fi - if [[ $sheet && $xidle -gt $idle ]]; then - timetrap out - fi + + if [[ -e /a/bin/bash_unpublished/source-state ]]; then + # /a gets remounted due to btrbk, ignore error code for file doesnt exist + source /a/bin/bash_unpublished/source-state || [[ $? == 1 ]] + fi + if [[ $MAIL_HOST == "$HOSTNAME" ]]; then + + bouncemsg= + glob=(/m/md/bounces/new/*) + if [[ -e ${glob[0]} ]]; then + chars+=(BOUNCE) + bouncemsg="message in /m/md/bounces/new" + fi + p $bouncemsg | loday -1 bounce + # emails without the S (seen) flag. this only checks the last flag, + # but its good enough for me. + glob=(/m/md/alerts/{new,cur}/!(*,S)) + if [[ -e ${glob[0]} ]]; then + chars+=(A) + fi + + glob=(/m/md/daylert/{new,cur}/!(*,S)) + if [[ -e ${glob[0]} ]]; then + chars+=(DAY) + fi + + bbkmsg= + if [[ $(systemctl is-active btrbk.timer) != active ]]; then + chars+=(BTRBK.TIMER) + bbkmsg="not enabled" + fi + p "$bbkmsg" | lo -480 btrbk.timer + + ## check if last snapshot was within an hour + vol=o + # this section generally copied from btrbk scripts, but + # this part modified to speed things up by about half a second. + # I'm not sure if its quite as reliable, but it looks pretty safe. + # Profiled it using time and also adding to the top of the file: + # set -x + # PS4='+ $(date "+%2N") ' + # allow failure in case there are no snapshots yet. + # shellcheck disable=SC2012 + shopt -u nullglob + files=(/mnt/root/btrbk/$vol.20*) + shopt -s nullglob + snaps=() + if (( ${#files[@]} )); then + snaps=($(ls -1avdr "${files[@]}" 2>/dev/null |head -n1 || : )) + fi + now=$(date +%s) + maxtime=0 + for s in ${snaps[@]}; do + file=${s##*/} + t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s) + if (( t > maxtime )); then + maxtime=$t fi + done + snapshotmsg= + if (( maxtime < now - 4*60*60 )); then + chars+=(OLD-SNAP) + snapshotmsg="/o snapshot older than 4 hours" fi - fi + p "$snapshotmsg" | lo -1 old-snapshot + + + # commented out, only using timetrap retrospectively. + # # clock us out in timetrap if are idle too long + # if [[ -e /p/.timetrap.db ]]; then + # export DISPLAY=:0 + # if type -p xprintidle &>/dev/null && xidle=$(xprintidle 2>/dev/null); then + # if [[ $xidle == [0-9]* ]]; then + # sheet=$(sqlite3 /p/.timetrap.db "select sheet from entries where end is NULL;") + # idle=300000 + # if [[ $sheet == w ]]; then + # idle=900000 + # fi + # if [[ $sheet && $xidle -gt $idle ]]; then + # timetrap out + # fi + # fi + # fi + # fi + fi if ip l show tunfsf &>/dev/null; then # this is for tracking dns over tls issue, which @@ -138,26 +210,7 @@ write-status() { if [[ $(find /var/mail -type f \! -empty -print -quit) ]]; then var_mail_msg="message in /var/mail" fi - loday -1 var_mail $var_mail_msg - - bouncemsg= - glob=(/m/md/bounces/new/*) - if [[ -e ${glob[0]} ]]; then - chars+=(BOUNCE) - bouncemsg="message in /m/md/bounces/new" - fi - loday -1 bounce $bouncemsg - # emails without the S (seen) flag. this only checks the last flag, - # but its good enough for me. - glob=(/m/md/alerts/{new,cur}/!(*,S)) - if [[ -e ${glob[0]} ]]; then - chars+=(A) - fi - - glob=(/m/md/daylert/{new,cur}/!(*,S)) - if [[ -e ${glob[0]} ]]; then - chars+=(DAY) - fi + p $var_mail_msg | loday -1 var_mail tmp=(/var/local/cron-errors/mailtest-check*) @@ -180,8 +233,8 @@ write-status() { case $HOSTNAME in # No point in emailing about the mailq on a host where we don't # check email. - $MAIL_HOST|bk) - loday -120 qlen $qmsg + $MAIL_HOST) + p $qmsg | loday -120 qlen ;; esac @@ -273,52 +326,6 @@ write-status() { # leave it up to epanic-clean to send email notification fi - if [[ -e /a/bin/bash_unpublished/source-state ]]; then - # /a gets remounted due to btrbk, ignore error code for file doesnt exist - source /a/bin/bash_unpublished/source-state || [[ $? == 1 ]] - fi - if [[ $MAIL_HOST == "$HOSTNAME" ]]; then - bbkmsg= - if [[ $(systemctl is-active btrbk.timer) != active ]]; then - chars+=(BTRBK.TIMER) - bbkmsg="not enabled" - fi - lo -480 btrbk.timer $bbkmsg - - ## check if last snapshot was within an hour - vol=o - # this section generally copied from btrbk scripts, but - # this part modified to speed things up by about half a second. - # I'm not sure if its quite as reliable, but it looks pretty safe. - # Profiled it using time and also adding to the top of the file: - # set -x - # PS4='+ $(date "+%2N") ' - # allow failure in case there are no snapshots yet. - # shellcheck disable=SC2012 - shopt -u nullglob - files=(/mnt/root/btrbk/$vol.20*) - shopt -s nullglob - snaps=() - if (( ${#files[@]} )); then - snaps=($(ls -1avdr "${files[@]}" 2>/dev/null |head -n1 || : )) - fi - now=$(date +%s) - maxtime=0 - for s in ${snaps[@]}; do - file=${s##*/} - t=$(date -d $(sed -r 's/(.{4})(..)(.{5})(..)(.*)/\1-\2-\3:\4:\5/' <<<${file#$vol.}) +%s) - if (( t > maxtime )); then - maxtime=$t - fi - done - snapshotmsg= - if (( maxtime < now - 4*60*60 )); then - chars+=(OLD-SNAP) - snapshotmsg="/o snapshot older than 4 hours" - fi - lo -1 old-snapshot $snapshotmsg - fi - if [[ ! -e $status_file || -w $status_file ]]; then if [[ -e /a/bin/bash_unpublished/source-state ]]; then cat /a/bin/bash_unpublished/source-state >$status_file @@ -328,7 +335,6 @@ write-status() { echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file fi fi - } # use this if we want to do something just once per minute first_chars=()