From 40dd151ec6ba75633c74568da59e35a45351f194 Mon Sep 17 00:00:00 2001 From: Ian Kelling Date: Fri, 25 Mar 2022 02:25:24 -0400 Subject: [PATCH] fixes, prometheus, lots of stuff --- bitfolk-chroot-install | 89 +++++++ bitfolk-rescue-init | 42 ++++ bitfolk-rescue-install | 112 +++++++++ bk-backup | 7 +- brc | 18 ++ brc2 | 22 +- btrfsmaint | 46 +++- check-remote-mailqs | 4 +- conflink | 23 +- distro-begin | 52 ++-- distro-end | 82 +++++-- dynamic-ip-update | 9 +- filesystem/etc/default/prometheus | 2 +- .../etc/default/prometheus-alertmanager | 12 +- .../alertmanager_templates/iank.tmpl | 10 + filesystem/etc/prometheus/file_sd/node.yml | 9 +- filesystem/etc/prometheus/file_sd/tlsnode.yml | 6 +- filesystem/etc/prometheus/rules/iank.yml | 226 ++++++++++++------ .../etc/systemd/system/btrfsmaintstop.service | 4 +- .../systemd/system/dynamicipupdate.service | 4 +- .../etc/systemd/system/epanicclean.service | 2 +- .../prometheus-alertmanager.d/restart.conf | 11 + .../systemd/system/prometheus.d/restart.conf | 8 + .../etc/systemd/system/systemstatus.service | 4 +- filesystem/usr/local/bin/myterm | 14 ++ filesystem/usr/local/bin/myupgrade | 7 +- i3-sway/common.conf | 2 +- install-my-scripts | 1 - .../etc/default/prometheus-node-exporter | 0 mail-setup | 20 +- mount-latest-subvol | 2 +- pkgs | 9 +- primary-setup | 5 + rootsshsync | 2 +- subdir_files/.config/i3/config | 2 +- subdir_files/.config/sakura/sakura.conf | 2 + subdir_files/.config/sway/config | 2 +- system-status | 57 +++-- 38 files changed, 724 insertions(+), 205 deletions(-) create mode 100755 bitfolk-chroot-install create mode 100644 bitfolk-rescue-init create mode 100755 bitfolk-rescue-install create mode 100644 filesystem/etc/prometheus/alertmanager_templates/iank.tmpl create mode 100644 filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf create mode 100644 filesystem/etc/systemd/system/prometheus.d/restart.conf create mode 100755 filesystem/usr/local/bin/myterm rename {filesystem => machine_specific/kd/filesystem}/etc/default/prometheus-node-exporter (100%) diff --git a/bitfolk-chroot-install b/bitfolk-chroot-install new file mode 100755 index 0000000..37e77b7 --- /dev/null +++ b/bitfolk-chroot-install @@ -0,0 +1,89 @@ +#!/bin/bash + +if ! test "$BASH_VERSION"; then echo "error: shell is not bash" >&2; exit 1; fi +shopt -s inherit_errexit 2>/dev/null ||: # ignore fail in bash < 4.4 +set -eE -o pipefail +trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" exit status: $?, PIPESTATUS: ${PIPESTATUS[*]}" >&2' ERR + + +host=$1 + +case $host in + je) + ip6=2001:ba8:1f1:f09d + ip4=85.119.82.128 + ;; + bk) + ip6=2001:ba8:1f1:f0c9 + ip4=85.119.83.50 + ;; +esac + +debconf-set-selections <<'EOF' +locales locales/default_environment_locale select en_US.UTF-8 +locales locales/locales_to_be_generated multiselect en_US.UTF-8 UTF-8 +EOF + +# /a/bin/fai/fai/config/hooks/updatebase.UBUNTU +debconf --owner=locales sh -c ' + . /usr/share/debconf/confmodule + db_version 2.0 + db_get locales/locales_to_be_generated && + mkdir -p /var/lib/locales/supported.d && + echo "$RET" > /var/lib/locales/supported.d/local' +dpkg-reconfigure -fnoninteractive locales + +apt -y remove --purge --auto-remove netplan.io libnetplan0 +apt update +apt -y install linux-virtual-hwe-20.04 grub-pc-bin openssh-server ifupdown rsync +mkdir -p /root/.ssh +chmod 700 /root/.ssh +cat >/root/.ssh/authorized_keys <<'EOF' +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDX42yru/h6r6UDRy/VwVZjcYEmNLG5/SUjv7xwu43OaW0wL+uHYg2rkfn4Ygh5o1I5pgBh2SWg8TeWuVGhgL1SCuBzzeai/+58Sny065Qak+D2WjVOuRonRelh+CBA5EpNZPuiWQkoWdf9NACTBCbS2Zu7r8OOgRqu/ruaDNePlG5+U0Wlpy3oBnpbzQiuSA3AKMW30fsCJtOBjz5qQaiPbYEKJy3AOvtbq10wliKx9TpsTzrq8dKWs7PLhZnzqVCsaq6D95IzjqXcSpx4Cga5bn+YEuAnJQ53PGA5eO+hpz6HDmawTbJlaV/Dufb9bJ/ZZy1DXzs07yWRtTEY54/X ian@iankelling.org +EOF + + +# todo update this and hostname depending on host +cat >/etc/network/interfaces < /proc/sys/net/ipv6/conf/default/accept_ra + post-up echo 0 > /proc/sys/net/ipv6/conf/all/accept_ra + post-up echo 0 > /proc/sys/net/ipv6/conf/$IFACE/accept_ra + post-up echo 0 > /proc/sys/net/ipv6/conf/default/autoconf + post-up echo 0 > /proc/sys/net/ipv6/conf/all/autoconf + post-up echo 0 > /proc/sys/net/ipv6/conf/$IFACE/autoconf +EOF + +cat >/etc/fstab <<'EOF' +/dev/xvda1 / ext4 noatime,nodiratime 0 1 +/dev/xvdb1 none swap nofail,x-systemd.device-timeout=30s,x-systemd.mount-timeout=30s,sw 0 0 +EOF + +cat >> /etc/default/grub <<'EOF' +GRUB_CMDLINE_LINUX_DEFAULT="" +GRUB_CMDLINE_LINUX="console=hvc0" +EOF + +update-grub + +cat >/etc/systemd/resolved.conf.d/servers.conf <<'EOF' +[Resolve] +DNS=85.119.80.232 85.119.80.233 +Domains=~. +EOF + +cat >/etc/hostname </root/.ssh/authorized_keys <<'EOF' +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDX42yru/h6r6UDRy/VwVZjcYEmNLG5/SUjv7xwu43OaW0wL+uHYg2rkfn4Ygh5o1I5pgBh2SWg8TeWuVGhgL1SCuBzzeai/+58Sny065Qak+D2WjVOuRonRelh+CBA5EpNZPuiWQkoWdf9NACTBCbS2Zu7r8OOgRqu/ruaDNePlG5+U0Wlpy3oBnpbzQiuSA3AKMW30fsCJtOBjz5qQaiPbYEKJy3AOvtbq10wliKx9TpsTzrq8dKWs7PLhZnzqVCsaq6D95IzjqXcSpx4Cga5bn+YEuAnJQ53PGA5eO+hpz6HDmawTbJlaV/Dufb9bJ/ZZy1DXzs07yWRtTEY54/X ian@iankelling.org +EOF +apt update +apt -y install openssh-server + + +##### in another terminal ###### + +host=je +scp /b/ds/bitfolk* root@$host.b8.nz: +ssh root@$host ./bitfolk-rescue-install $host + +### back to the 1st terminal + +poweroff +boot + +# press ctrl ] +exit + +jepush +# todo: lets copy the host keys around so we dont have to do this. +khfix je +sl root@je /a/bin/ds/distro-begin + +# todo, fix it so i can ssh to +sl je /a/bin/ds/distro-begin +sl je /a/bin/ds/distro-end diff --git a/bitfolk-rescue-install b/bitfolk-rescue-install new file mode 100755 index 0000000..3270a3b --- /dev/null +++ b/bitfolk-rescue-install @@ -0,0 +1,112 @@ +#!/bin/bash + +# assumes we've partitioned /dev/xvda1 + +if ! test "$BASH_VERSION"; then echo "error: shell is not bash" >&2; exit 1; fi +shopt -s inherit_errexit 2>/dev/null ||: # ignore fail in bash < 4.4 +set -eE -o pipefail +trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" exit status: $?, PIPESTATUS: ${PIPESTATUS[*]}" >&2' ERR + +# already did this to ssh in +#apt update +apt install -y mmdebstrap + +wipefs -a /dev/xvda1 + +mkfs.ext4 /dev/xvda1 + + +#apt-key export B138CA450C05112F +cat >trisquel.key <<'EOF' +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQINBFhxcQMBEADaT9jTxXNbmjx7kZdTK7JCFx2OAeSS0+XltJnGOPnd6Vj4W1u2 +QLReYD1rVVYA8kneT3VjvZpKO/Ho3TbQZ4E8hNDEwaVmKnTfrmptIzv44cHJexsZ +eTol74rac/pC/oXCHGINWLflfyQt8iB0dGSEPjtDXvYNFTwBYrQDI9apO5JVWvCB +qLaQdXU+MCsTaD5OZ4bJ2dQleI34UGea+NrrnAZP68d5hsLL+WTa65fhB3Bws8zc +1v+JhVJhLYNQOcTHRXU7ieUN2zy+JzbD/10XV64YZQseEkhXG8LMRvfbTDD/SigD +wKViVFkDa7NFfmpOE9kaF6Nh/XrWmMJjG49KtYUln/G2XCV4TpJrbrWW0OxGqki+ +HH/36N63CZR4lj0EENFQduikTq1LeyQBXQSccwH9FJEI/4Uz5uuVM+tviScmgWEC +YXo7PdCoUUJDiR6Uma4COAYOTHM/7C5wVofkH1mq5fSz6rbBNIDIoy/W6GrN41s0 +WOl+z9ZgNlY1qtvpwSDenY9WERLajUAJKVTAwI2mvIETo+q8b2J8iIlfI8KkW/+q +qt87BkFmo2Acgh9l0O1j/Ysp5p7OtCjz8uuO+WzbBo6RlXafx/9e6QoatbtwSBUp +4W3w2/VANRYtL5DyDCbcuwMk77r9jXp1913sTFlin4xIs840gcVahetA+QARAQAB +tDFUcmlzcXVlbCBHTlUvTGludXggPHRyaXNxdWVsLWRldmVsQHRyaXNxdWVsLmlu +Zm8+iQI2BBMBCgAhBQJYcXEDAhsDBQsJCAcDBRUKCQgLBRYDAgEAAh4BAheAAAoJ +ELE4ykUMBREv6NUP9jTl1CZKHqL3NmF2Df/ID+za7YO5IebxbzRC9vkjrWSuoMpw +xJ/U5pBCsz0bDewJXMx0XeSNgo/WAzWoPmh3PTUXKhLjlGS2DII96XXbMy2zi+3r ++apIY3wedYkC/BiK9w8pGXGGlTXTo9zitWZC2/yWC4I9W2818mMJPXpQjvkzqdwU +UlV54fpnqoMNsLFEa1w1ahDerdCTe0Azrr+3YrKaQ287MpkWwO/Cf/yYg0UhbDih +FFMZ4Wa9aywvxQ86khghOafLLkIHcakMBdVRodym00bGeAjeNHnMffDi3k8tfejk +g6iLVrZf21+KsVfV+PLX4QQsPCR/dlneKKCAEPh+awserncssizx2/ujhvTd7z3l +tXGG7UcQP7fYTBWNkU7+ddMOWp26hOsINt0NyxhiGT2ZPEy1vpZ1H80rlaMkiISH +Z56SCfcHGuEHlkDKdz7ZsS8gU+zqMAnNEDb6UrqZZbhJcR3N7DsTQC/okoF8egDM +nHD9pUdDakPungnt6j7eLA6Ogca58mbIvwsQ+Qn9Urcd0m30to6WCTlj4jKsrMy/ +QtbyVSc/G3PZXVqP8xWIpuZtu0eMx+NjCKWmOYvTgIjbjLp0z801Weexn9uva+1z +5nRy+00aOddoLhtXqNFxNS94gXvH3D6ZJ2ejADooEBiqk1M+KWFtOsW01QG5Ag0E +WHFxAwEQALAKjsSSREoYjswMG1/znBkoNz199DkKJ3DnOk5NulkZcAoHeQVLnv2M +/1qycG4ndoIkINdz37fKdFziEJd4cBSQ+3gNollaxM2x7KdF7M13Z4YgFgg40qxO +8id8CSLga2klnFU8aa5PtRPYd4XZ5azpxzCRF8u+1ojM+rLAO0hKLGDhBqjKFvG4 +ASeX14F1R2yiGvZU2lQKQu2ZIk9IqN7M0IsCEh1O8+GNd7lCTFyvAYK0ai0dg9Q5 +F6X2YvQVYDik7rOuP6D6oUmGXufi2vc0OxFX5dBHa4z7XR0BRzg9VtkUerHSbVPI +c+3mgG5+QmlD+3NKYqiTZvKOWQbgKD/Pg0E0hqw8IjSThmge8XQcTh6qhW8ww0Gz +ha6HN0At5kMGbQqsTARjfgjhJZdyjA09NGYu1KVKDrKMrN9le9tO05ztZeP3y5My +S9LaWDE6Flm0BBqkkrHDk+9ID/qDixe/3ZCppu3dJsCF8aaG+sIQjxlMAeXtKOLl +ZuQbPaVJbQXElwZo3Nz20N2RAZJLXycCev7EbC8Afpg6TYjlJyJX9uyKxDv+QORG +RJ5vFA4evNCmUrS0PpcodJxk5TMSuR9vRuvT1jVVMe18T1F74XRqTW6xizC7EEM8 +X5QLDuVMRErSUPfcNYLTGJAvPTQ/EgU5aK+H1qv1EEbXeMiuksdvABEBAAGJAh8E +GAEKAAkFAlhxcQMCGwwACgkQsTjKRQwFES/C0A//aT9JDbwF4JYgyxQuPuxb8G/e +9thHNBhPmGL7gpyGzUW3q/c6HHnFxT7YPA37fsN/JD9Mcdx2rRFhz0XVR6cfdQZy +299s2/aX4Tu4FbMnmM+Du5uFFgStJA7LjaacHn6MxEohUeZAL7LMYiUovbwnsaiP +0sPhLaMrOQkRL/9mEKJiNbn6r/xX4xegzYNqoNdDKbcARaAzm5AH03Mmbc7Ss+OZ +4v/7vlcUnyEZ2c4jazP7W+pGWIw9f3SqnIxuCeDrCD35IFsUrE27dbtaNpkKw9zF +lfaEC+6PAI7M78gg2RNvaurCJR5B7bENrobf0lxbYGLGFcOIqTXkbuWjjO3eI/5Q +rmnO8Uy41Zos03Gsa6QkQ4p6OtVN4hHLxXkirs31cIocPqiJ7Vi+OH8stMNukvVT +dgnuw4dbPEhDnrFREDNSuRtV+2Lxl4JLr7gQUQDZKEf8cYZUAdN69dcW48Ugdvgu +6cRDVWakfim6kvZiQ0vxGxGM02V3RdhhZqrwXXYUPyyWMW230IjYc9cYQ+3C/1K4 +MUUeMjKDMPQ/jlUiMjZeE+X0W/TaUj8uCOJ4M6+oYMqwUECPSFe9Of7VTKhB3+Ex +wGEtYWJUfhuYu8Tph2GZmud0vz4+ugpkliFVliGJfPPJ1EfgAAiUUvomoIXKsynV +McDbwCjFQn2iazszZsg= +=UAIm +-----END PGP PUBLIC KEY BLOCK----- +EOF + +apt-key add trisquel.key + +mount /dev/xvda1 /mnt + +mmdebstrap nabia /mnt - <<'EOF' +deb http://archive.trisquel.org/trisquel/ nabia main +deb-src http://archive.trisquel.org/trisquel/ nabia main + +deb http://archive.trisquel.org/trisquel/ nabia-updates main +deb-src http://archive.trisquel.org/trisquel/ nabia-updates main + +deb http://archive.trisquel.info/trisquel/ nabia-security main +deb-src http://archive.trisquel.info/trisquel/ nabia-security main + +deb http://archive.trisquel.org/trisquel/ nabia-backports main +deb-src http://archive.trisquel.org/trisquel/ nabia-backports main +EOF + +cd /mnt + +chrbind() { + local d + # dev/pts needed for pacman signature check + for d in dev proc sys dev/pts; do + [[ -d $d ]] + if ! mountpoint $d &>/dev/null; then + mount -o bind /$d $d + fi + done +} +chrbind + +host=$1 +cp /root/bitfolk-chroot-install /mnt +chroot . /bitfolk-chroot-install $host + +poweroff + +boot diff --git a/bk-backup b/bk-backup index 3621e76..cc25cec 100755 --- a/bk-backup +++ b/bk-backup @@ -37,14 +37,17 @@ ret=0 if [[ $HOSTNAME == $MAIL_HOST ]]; then mkdir -p /p/bkbackup for ncdir in /var/www/ncexpertpath /var/www/ncninja; do + if [[ ! -d $ncdir ]]; then + continue + fi ncbase=${ncdir##*/} mkdir -p /p/bkbackup/$ncbase ssh root@$host sudo -u www-data php $ncdir/occ -q maintenance:mode --on rsync -ra --exclude=testignore --delete root@$host:$ncdir/{config,data,themes} /p/bkbackup/$ncbase || ret=$? ssh root@$host sudo -u www-data php $ncdir/occ -q maintenance:mode --off if (( ret )); then - echo "$0: error: failed rsync $ncdir" - ret=1 + echo "$0: error: failed rsync $ncdir" + ret=1 fi done rsync -ra --delete root@$host:/m /p/bkbackup diff --git a/brc b/brc index 00f2172..99b5c03 100644 --- a/brc +++ b/brc @@ -397,6 +397,24 @@ b() { c - } +vp9() { + in=$PWD/$1 + + if [[ $2 ]]; then + out=$PWD/$2 + else + out=$PWD/vp9/$1 + fi + cd $(mktemp -d) + pwd + ffmpeg -threads 0 -i $in -g 192 -vcodec libvpx-vp9 -vf scale=-1:720 -max_muxing_queue_size 9999 -b:v 750K -pass 1 -an -f null /dev/null && \ + ffmpeg -y -threads 0 -i $in -g 192 -vcodec libvpx-vp9 -vf scale=-1:720 -max_muxing_queue_size 9999 -b:v 750K -pass 2 -c:a libvorbis -qscale:a 5 $out + cd - +} + +utcl() { # utc 24 hour time to local hour 24 hour time + echo "print( ($1 $(date +%z | sed -r 's/..$//;s/^(-?)0*/\1/')) % 24)"|python3 +} # c. better cd if type -p wcd &>/dev/null; then diff --git a/brc2 b/brc2 index 6b97783..c233c92 100644 --- a/brc2 +++ b/brc2 @@ -502,24 +502,23 @@ EOF done } bindpushb8() { - dsign iankelling.org expertpathologyreview.com zroe.org amnimal.ninja lipush for h in li bk; do m sl $h <<'EOF' source ~/.bashrc -m dnsup m dnsb8 EOF done } dnsup() { - conflink + conflink -f m ser reload bind9 } dnsb8() { local f=/var/lib/bind/db.b8.nz ser stop bind9 + sleep 1 sudo rm -fv $f.jnl sudo install -m 644 -o bind -g bind /p/c/machine_specific/vps/bind-initial/db.b8.nz $f ser restart bind9 @@ -1075,8 +1074,8 @@ Address = 10.8.0.$ipsuf/24 PostUp = ping -c1 10.8.0.1 ||: [Peer] -# li -PublicKey = zePGl7LoS3iv6ziTI/k8BMh4L3iL3K2t9xJheMR4hQA= +# li. called wgmail on that server +PublicKey = CTFsje45qLAU44AbX71Vo+xFJ6rt7Cu6+vdMGyWjBjU= AllowedIPs = 10.8.0.0/24 Endpoint = 72.14.176.105:1194 PersistentKeepalive = 25 @@ -1084,7 +1083,7 @@ EOF umask $umask_orig # old approach. systemd seems to work fine and cleaner. rm -f ../network/interfaces.d/wghole - cedit -q $host /p/c/machine_specific/li/filesystem/etc/wireguard/wghole.conf </dev/null || return 0 @@ -39,7 +44,7 @@ check-idle() { usage() { cat <&2 usage 1 @@ -93,8 +102,12 @@ main() { fi if ! $idle; then - btrfs scrub cancel $mnt &>/dev/null ||: - continue + if $dryrun; then + echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt" + else + btrfs scrub cancel $mnt &>/dev/null ||: + continue + fi fi if $check; then continue @@ -118,20 +131,31 @@ main() { e ionice -c 3 btrfs balance start -musage=$usage $mnt done fi - # e btrfs filesystem df $mnt - # e df -H $mnt - date=$( - btrfs scrub status $mnt | \ - sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p' - ) + date= + scrub_status=$(btrfs scrub status $mnt) + if printf "%s\n" "$scrub_status" | grep -i '^status:[[:space:]]*finished$' &>/dev/null; then + date=$(printf "%s\n" "$scrub_status" | sed -rn 's/^Scrub started:[[:space:]]*(.*)/\1/p') + fi + if [[ ! $date ]]; then + # output from older versions, at least btrfs v4.15.1 + date=$( + printf "%s\n" "$scrub_status" | \ + sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p' + ) + fi if [[ $date ]]; then + if $dryrun; then + echo "$0: last scrub finish for $mnt: $date" + fi date=$(date --date="$date" +%s) # if date is sooner than 90 days ago # the wiki recommends 30 days or so, but # it makes the comp lag like shit for a day, # so I'm going with 90 days. if (( date > $(date +%s) - 60*60*24*30 )); then - echo "cron: skiping scrub of $mnt" + if $dryrun; then + echo "$0: skiping scrub of $mnt, last was $(( ($(date +%s) - date) / 60/60/24 )) days ago, < 30 days" + fi continue fi fi diff --git a/check-remote-mailqs b/check-remote-mailqs index 3caaa61..6e9cf7c 100755 --- a/check-remote-mailqs +++ b/check-remote-mailqs @@ -27,8 +27,8 @@ for h in bk je li frodo kwwg x3wg x2wg kdwg sywg; do if [[ -s $statefile ]]; then logsec=$(date +%s -d "$(head -n1 $statefile | awk '{print $1,$2}')") nowsec=$(date +%s) - if (( logsec < nowsec - 60*60*48 )); then - echo $0: host $h ssh /usr/local/bin/check-mailq fail for over 48 hours + if (( logsec < nowsec - 60*60*20 )); then + echo $0: host $h ssh /usr/local/bin/check-mailq fail for over 20 hours fi fi printf "%s\n" "$c" | ts "%F %T" >> $statefile diff --git a/conflink b/conflink index d3a7b2a..defb832 100755 --- a/conflink +++ b/conflink @@ -81,7 +81,10 @@ common-file-setup() { for dir in "$@"; do fs=$dir/filesystem if [[ -e $fs && $user =~ ^iank?$ ]]; then - cmd=( s rsync -aiSAX --chown=root:root --chmod=g-s + # we dont want t, instead c for checksum. + # That way we dont set times on directories. + # -a = -rlptgoD + cmd=( s rsync -rclpgoDiSAX --chown=root:root --chmod=g-s --exclude=/etc/dovecot/users --exclude='/etc/exim4/passwd*' --exclude='/etc/exim4/*.pem' @@ -92,7 +95,11 @@ common-file-setup() { case $file in etc/prometheus/rules/iank.yml) case $HOSTNAME in - kd) m s systemctl reload prometheus ;; + kd) + if systemctl is-active prometheus &>/dev/null; then + m s systemctl reload prometheus + fi + ;; esac ;; etc/systemd/system/*) @@ -115,7 +122,7 @@ common-file-setup() { done < <("${cmd[@]}") fi - if [[ -e $dir/subdir_files ]]; then + if ! $fast && [[ -e $dir/subdir_files ]]; then m subdir-link-r $dir/subdir_files fi local x=( $dir/!(binds|subdir_files|filesystem|machine_specific|..|.|.#*) ) @@ -185,7 +192,7 @@ case $user in for f in /etc/prometheus-{,export-}htpasswd; do if [[ -e $f ]]; then s chmod 640 $f - if getent passwd www-data; then + if getent passwd www-data &>/dev/null; then s chown root:www-data $f fi fi @@ -194,15 +201,15 @@ case $user in if [[ -e $f ]]; then # note: this is duplicative of the file's own permissions s chmod 640 $f /etc/prometheus-pass - if getent passwd prometheus; then + if getent passwd prometheus &>/dev/null; then s chown root:prometheus $f fi fi - - ##### end special extra stuff ##### - m sudo -H -u user2 "${BASH_SOURCE[0]}" + if ! $fast; then + m sudo -H -u user2 "${BASH_SOURCE[0]}" + fi f=/a/bin/distro-setup/system-status if [[ -x $f ]]; then diff --git a/distro-begin b/distro-begin index e969940..e2d1a87 100755 --- a/distro-begin +++ b/distro-begin @@ -78,7 +78,10 @@ fi ### arg parsing recompile=false -emacs=true +emacs=false +if [[ -e /a/opt/emacs ]]; then + emacs=true +fi while [[ $1 == -* ]]; do case $1 in -r) recompile=true; shift ;; @@ -97,7 +100,7 @@ source $script_dir/pkgs set +x source /a/bin/distro-functions/src/identify-distros $interactive || set -x -for f in kd x2 x3 frodo tp li bk je demohost kw; do +for f in kd x2 x3 frodo tp li bk je demohost kw sy bo; do eval "$f() { [[ $HOSTNAME == $f ]]; }" done codename=$(debian-codename) @@ -233,6 +236,10 @@ sudo sed -i --follow-symlinks -f - /etc/hosts < +AuthType Basic +AuthName "basic_auth" +# created with +# htpasswd -c prometheus-export-htpasswd USERNAME +AuthUserFile "/etc/prometheus-export-htpasswd" +Require valid-user + +EOF ;;& + bk) sgo wg-quick@wgmail @@ -1656,7 +1677,7 @@ m /a/bin/buildscripts/rust m /a/bin/buildscripts/misc m /a/bin/buildscripts/pithosfly #m /a/bin/buildscripts/alacritty -m /a/bin/buildscripts/kitty +#m /a/bin/buildscripts/kitty pi-nostart virtinst virt-manager soff libvirtd @@ -1832,7 +1853,10 @@ sudo debconf-set-selections < AuthType Basic @@ -1868,6 +1893,20 @@ AuthUserFile "/etc/prometheus-htpasswd" Require valid-user EOF + # by default, the alertmanager web ui is not enabled other than a page + # that suggests to use the amtool cli. that tool is good, but you cant + # silence things nearly as fast. + if [[ ! -e /usr/share/prometheus/alertmanager/ui/index.html ]]; then + sudo chroot /nocow/schroot/bullseye prometheus-alertmanager + sudo chroot /nocow/schroot/bullseye /usr/share/prometheus/alertmanager/generate-ui.sh + sudo rsync -avih /nocow/schroot/bullseye/usr/share/prometheus/alertmanager/ui/ /usr/share/prometheus/alertmanager/ui + ser restart prometheus-alertmanager + fi + + for ser in prometheus-node-exporter prometheus-alertmanager prometheus; do + sysd-prom-fail-install $ser + done + ;; *) pi prometheus-node-exporter @@ -1881,21 +1920,10 @@ case $HOSTNAME in # either use iptables or, in # /etc/default/prometheus-node-exporter # listen on the wireguard interface - li|je|bk) - # ex for exporter - web-conf -p 9101 -f 9100 - apache2 ${HOSTNAME}ex.b8.nz <<'EOF' - -AuthType Basic -AuthName "basic_auth" -# created with -# htpasswd -c prometheus-export-htpasswd USERNAME -AuthUserFile "/etc/prometheus-export-htpasswd" -Require valid-user - -EOF - ;; *) wgip=$(command sudo sed -rn 's,^ *Address *= *([^/]+).*,\1,p' /etc/wireguard/wghole.conf) + # old filename. remove once all hosts are updated. + s rm -fv /etc/apache2/sites-enabled/${HOSTNAME}wg.b8.nz.conf web-conf -i -a $wgip -p 9101 -f 9100 - apache2 ${HOSTNAME}wg.b8.nz <<'EOF' AuthType Basic @@ -1906,7 +1934,25 @@ AuthUserFile "/etc/prometheus-export-htpasswd" Require valid-user EOF - ;; + # For work, i think we will just use the firewall for hosts in the main data center, and + # apache/nginx + tls + basic auth outside of it. or consider stunnel. + + + # TODO: figure out how to detect the ping failure and try again. + + # Binding to the wg interface, it might go down, so always restart, and wait for it on boot. + s mkdir /etc/systemd/system/apache2.service.d + sd /etc/systemd/system/apache2.service.d/restart.conf < 0 + labels: + severity: day + + + - alert: mailtest_check expr: |- time() - mailtest_check_last_usec > 60 * 12 labels: severity: day annotations: - description: '{{ $labels.instance }} mailtest-check' - summary: '{{ $labels.instance }} mailtest-check' + summary: '12 minutes down' # 42 mins: enough for a 30 min queue run plus 12 - - alert: mailtest-check + - alert: mailtest_check expr: |- time() - mailtest_check_last_usec > 60 * 42 labels: severity: prod annotations: - description: '{{ $labels.instance }} mailtest-check' - summary: '{{ $labels.instance }} mailtest-check' + summary: '43 minutes down' - alert: 1pmtest - expr: hour() == 18 and minute() < 5 + expr: hour() == 17 and minute() < 5 for: 0m labels: severity: daytest annotations: - summary: Prometheus daily test alert (instance {{ $labels.instance }}) - description: "Prometheus daily test alert if no other alerts. It - is an end to end test.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + summary: Prometheus daily test alert + + + +# alternate expression, to calculate if the alert would have fired is: +# min_over_time(sum_over_time(up[30m])[1d:]) == 0 +# where 30m matches the for: time in target_down +# +# sum_over_time is not needed, just convenience for graphing + - alert: target_down_inhibitor + expr: |- + sum_over_time(ALERTS{alertname="target_down"}[1d]) + labels: + severity: ignore + annotations: + summary: alert that indicates target_down alert fired in the last day + description: "VALUE = {{ $value }}" + +# For targets where we alert except for longer downtimes, we +# still want to know if it is going down many times for short times over +# a long period of time. But ignore reboots. +# +## Another way would be to detect an overall downtime: +# avg_over_time(node_systemd_unit_state{name="dynamicipupdate.service",state="active"}[1d]) < .95 + - alert: up_resets + expr: |- + resets(up[3d]) - changes(node_boot_time_seconds[3d]) > 15 + labels: + severity: warn + annotations: + summary: "Target has gone down {{ $value }} times in 3 days, > 15" + + # https://awesome-prometheus-alerts.grep.to/rules @@ -56,27 +151,30 @@ groups: severity: day annotations: summary: Prometheus job missing (instance {{ $labels.instance }}) - description: "A Prometheus job has disappeared\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "A Prometheus job has disappeared\n VALUE = {{ $value }}" - - alert: PrometheusTargetMissing +# TODO: some hosts, notably li and MAIL_HOST, we want to alert sooner than 30m, +# and severity to day. mail host is tricky since it roams, but I think the +# right way to do it is to check for absence of this metric: +# mailtest_check_last_usec{folder="/m/md/l/testignore",from="ian@iankelling.org"} + - alert: target_down expr: up == 0 for: 30m labels: severity: warn annotations: - summary: Prometheus target missing (instance {{ $labels.instance }}) - description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + summary: Target down for 30m - # todo: this should supress the above alert - # - alert: PrometheusAllTargetsMissing - # expr: count by (job) (up) == 0 - # for: 30m - # labels: - # severity: day - # alert-group: local-prom - # annotations: - # summary: Prometheus all targets missing (instance {{ $labels.instance }}) - # description: "A Prometheus job does not have living target anymore.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + # todo: this should group with the above alert + - alert: PrometheusAllTargetsMissing + expr: count by (job) (up) == 0 + for: 10m + labels: + severity: day +# alert-group: local-prom + annotations: + description: "A Prometheus job does not have living target anymore.\n VALUE = {{ $value }}" - alert: PrometheusConfigurationReloadFailure expr: prometheus_config_last_reload_successful != 1 @@ -84,20 +182,15 @@ groups: labels: severity: day annotations: - summary: Prometheus configuration reload failure (instance {{ $labels.instance }}) - description: "Prometheus configuration reload error\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - - # I have an out of band alert to make sure prometheus is up. this - # looks like it would generate false positives. todo: think - # through what a valid crash loop detection would look like. - # - alert: PrometheusTooManyRestarts - # expr: changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[15m]) > 10 - # for: 0m - # labels: - # severity: warning - # annotations: - # summary: Prometheus too many restarts (instance {{ $labels.instance }}) - # description: "Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus configuration reload error\n VALUE = {{ $value }}" + + - alert: PrometheusTooManyRestarts + expr: changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[30m]) > 10 + for: 0m + labels: + severity: warning + annotations: + description: "Prometheus has restarted more than ten times in the last 30 minutes. It might be crashlooping.\n VALUE = {{ $value }}" - alert: PrometheusAlertmanagerJobMissing expr: absent(up{job="alertmanager"}) @@ -105,8 +198,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus AlertManager job missing (instance {{ $labels.instance }}) - description: "A Prometheus AlertManager job has disappeared\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "A Prometheus AlertManager job has disappeared\n VALUE = {{ $value }}" - alert: PrometheusAlertmanagerConfigurationReloadFailure expr: alertmanager_config_last_reload_successful != 1 @@ -114,8 +206,7 @@ groups: labels: severity: day annotations: - summary: Prometheus AlertManager configuration reload failure (instance {{ $labels.instance }}) - description: "AlertManager configuration reload error\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "AlertManager configuration reload error\n VALUE = {{ $value }}" - alert: PrometheusNotConnectedToAlertmanager expr: prometheus_notifications_alertmanagers_discovered < 1 @@ -123,8 +214,7 @@ groups: labels: severity: day annotations: - summary: Prometheus not connected to alertmanager (instance {{ $labels.instance }}) - description: "Prometheus cannot connect the alertmanager\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus cannot connect the alertmanager\n VALUE = {{ $value }}" - alert: PrometheusRuleEvaluationFailures expr: increase(prometheus_rule_evaluation_failures_total[3m]) > 0 @@ -132,8 +222,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus rule evaluation failures (instance {{ $labels.instance }}) - description: "Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\n VALUE = {{ $value }}" - alert: PrometheusTemplateTextExpansionFailures expr: increase(prometheus_template_text_expansion_failures_total[3m]) > 0 @@ -141,8 +230,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus template text expansion failures (instance {{ $labels.instance }}) - description: "Prometheus encountered {{ $value }} template text expansion failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus encountered {{ $value }} template text expansion failures\n VALUE = {{ $value }}" - alert: PrometheusRuleEvaluationSlow expr: prometheus_rule_group_last_duration_seconds > prometheus_rule_group_interval_seconds @@ -150,8 +238,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus rule evaluation slow (instance {{ $labels.instance }}) - description: "Prometheus rule evaluation took more time than the scheduled interval. It indicates a slower storage backend access or too complex query.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus rule evaluation took more time than the scheduled interval. It indicates a slower storage backend access or too complex query.\n VALUE = {{ $value }}" - alert: PrometheusNotificationsBacklog expr: min_over_time(prometheus_notifications_queue_length[30m]) > 0 @@ -159,8 +246,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus notifications backlog (instance {{ $labels.instance }}) - description: "The Prometheus notification queue has not been empty for 10 minutes\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "The Prometheus notification queue has not been empty for 10 minutes\n VALUE = {{ $value }}" - alert: PrometheusAlertmanagerNotificationFailing expr: rate(alertmanager_notifications_failed_total[1m]) > 0 @@ -168,8 +254,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus AlertManager notification failing (instance {{ $labels.instance }}) - description: "Alertmanager is failing sending notifications\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Alertmanager is failing sending notifications\n VALUE = {{ $value }}" # file_sd doesnt count as service discovery, so 0 is expected. # - alert: PrometheusTargetEmpty @@ -178,8 +263,7 @@ groups: # labels: # severity: day # annotations: - # summary: Prometheus target empty (instance {{ $labels.instance }}) - # description: "Prometheus has no target in service discovery\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + # description: "Prometheus has no target in service discovery\n VALUE = {{ $value }}" - alert: PrometheusTargetScrapingSlow expr: prometheus_target_interval_length_seconds{quantile="0.9"} > 90 @@ -187,8 +271,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus target scraping slow (instance {{ $labels.instance }}) - description: "Prometheus is scraping exporters slowly\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus is scraping exporters slowly\n VALUE = {{ $value }}" - alert: PrometheusLargeScrape expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total[10m]) > 10 @@ -196,8 +279,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus large scrape (instance {{ $labels.instance }}) - description: "Prometheus has many scrapes that exceed the sample limit\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus has many scrapes that exceed the sample limit\n VALUE = {{ $value }}" - alert: PrometheusTargetScrapeDuplicate expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total[5m]) > 0 @@ -205,8 +287,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus target scrape duplicate (instance {{ $labels.instance }}) - description: "Prometheus has many samples rejected due to duplicate timestamps but different values\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus has many samples rejected due to duplicate timestamps but different values\n VALUE = {{ $value }}" - alert: PrometheusTsdbCheckpointCreationFailures expr: increase(prometheus_tsdb_checkpoint_creations_failed_total[1m]) > 0 @@ -214,8 +295,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus TSDB checkpoint creation failures (instance {{ $labels.instance }}) - description: "Prometheus encountered {{ $value }} checkpoint creation failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus encountered {{ $value }} checkpoint creation failures\n VALUE = {{ $value }}" - alert: PrometheusTsdbCheckpointDeletionFailures expr: increase(prometheus_tsdb_checkpoint_deletions_failed_total[1m]) > 0 @@ -223,8 +303,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus TSDB checkpoint deletion failures (instance {{ $labels.instance }}) - description: "Prometheus encountered {{ $value }} checkpoint deletion failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus encountered {{ $value }} checkpoint deletion failures\n VALUE = {{ $value }}" - alert: PrometheusTsdbCompactionsFailed expr: increase(prometheus_tsdb_compactions_failed_total[1m]) > 0 @@ -232,8 +311,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus TSDB compactions failed (instance {{ $labels.instance }}) - description: "Prometheus encountered {{ $value }} TSDB compactions failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus encountered {{ $value }} TSDB compactions failures\n VALUE = {{ $value }}" - alert: PrometheusTsdbHeadTruncationsFailed expr: increase(prometheus_tsdb_head_truncations_failed_total[1m]) > 0 @@ -241,8 +319,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus TSDB head truncations failed (instance {{ $labels.instance }}) - description: "Prometheus encountered {{ $value }} TSDB head truncation failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus encountered {{ $value }} TSDB head truncation failures\n VALUE = {{ $value }}" - alert: PrometheusTsdbReloadFailures expr: increase(prometheus_tsdb_reloads_failures_total[1m]) > 0 @@ -250,8 +327,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus TSDB reload failures (instance {{ $labels.instance }}) - description: "Prometheus encountered {{ $value }} TSDB reload failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus encountered {{ $value }} TSDB reload failures\n VALUE = {{ $value }}" - alert: PrometheusTsdbWalCorruptions expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) > 0 @@ -259,8 +335,7 @@ groups: labels: severity: warn annotations: - summary: Prometheus TSDB WAL corruptions (instance {{ $labels.instance }}) - description: "Prometheus encountered {{ $value }} TSDB WAL corruptions\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus encountered {{ $value }} TSDB WAL corruptions\n VALUE = {{ $value }}" - alert: PrometheusTsdbWalTruncationsFailed expr: increase(prometheus_tsdb_wal_truncations_failed_total[1m]) > 0 @@ -268,5 +343,4 @@ groups: labels: severity: warn annotations: - summary: Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }}) - description: "Prometheus encountered {{ $value }} TSDB WAL truncation failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "Prometheus encountered {{ $value }} TSDB WAL truncation failures\n VALUE = {{ $value }}" diff --git a/filesystem/etc/systemd/system/btrfsmaintstop.service b/filesystem/etc/systemd/system/btrfsmaintstop.service index 5e8160d..5ddec17 100644 --- a/filesystem/etc/systemd/system/btrfsmaintstop.service +++ b/filesystem/etc/systemd/system/btrfsmaintstop.service @@ -5,9 +5,9 @@ StartLimitIntervalSec=0 [Service] Type=simple -ExecStart=/usr/local/bin/sysd-mail-once -10 btrfsmaintstop /usr/local/bin/btrfsmaint check +ExecStart=/usr/local/bin/btrfsmaint check Restart=always RestartSec=600 [Install] -WantedBy=grahical.target +WantedBy=graphical.target diff --git a/filesystem/etc/systemd/system/dynamicipupdate.service b/filesystem/etc/systemd/system/dynamicipupdate.service index 54b04f9..302f7f4 100644 --- a/filesystem/etc/systemd/system/dynamicipupdate.service +++ b/filesystem/etc/systemd/system/dynamicipupdate.service @@ -5,9 +5,9 @@ StartLimitIntervalSec=0 [Service] Type=simple -ExecStart=/usr/local/bin/sysd-mail-once -40 dynamicipupdate /usr/local/bin/dynamic-ip-update +ExecStart=/usr/local/bin/dynamic-ip-update Restart=always RestartSec=600 [Install] -WantedBy=grahical.target +WantedBy=graphical.target diff --git a/filesystem/etc/systemd/system/epanicclean.service b/filesystem/etc/systemd/system/epanicclean.service index bc79520..5a0167e 100644 --- a/filesystem/etc/systemd/system/epanicclean.service +++ b/filesystem/etc/systemd/system/epanicclean.service @@ -10,4 +10,4 @@ Restart=always RestartSec=600 [Install] -WantedBy=grahical.target +WantedBy=graphical.target diff --git a/filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf b/filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf new file mode 100644 index 0000000..403672e --- /dev/null +++ b/filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf @@ -0,0 +1,11 @@ +# apparently alertmanager failes to start when the internet is down: +# Mar 10 13:20:09 kd prometheus-alertmanager[2719]: level=error ts=2022-03-10T18:20:09.907Z caller=main.go:243 msg="unable to initialize gossip mesh" err="create memberlist: Failed to get final advertise address: No private IP address found, and explicit IP not provided" + +[Unit] +# needed to continually restart +StartLimitIntervalSec=0 + +[Service] +Restart=always +# time to sleep before restarting a service +RestartSec=300 diff --git a/filesystem/etc/systemd/system/prometheus.d/restart.conf b/filesystem/etc/systemd/system/prometheus.d/restart.conf new file mode 100644 index 0000000..aa2ea84 --- /dev/null +++ b/filesystem/etc/systemd/system/prometheus.d/restart.conf @@ -0,0 +1,8 @@ +[Unit] +# needed to continually restart +StartLimitIntervalSec=0 + +[Service] +Restart=always +# time to sleep before restarting a service +RestartSec=600 diff --git a/filesystem/etc/systemd/system/systemstatus.service b/filesystem/etc/systemd/system/systemstatus.service index e21a4a3..eb21661 100644 --- a/filesystem/etc/systemd/system/systemstatus.service +++ b/filesystem/etc/systemd/system/systemstatus.service @@ -6,7 +6,7 @@ After=local-fs.target [Service] Type=simple Environment=XDG_RUNTIME_DIR=/run/user/1000 -ExecStart=/usr/local/bin/sysd-mail-once -3 systemstatus /usr/local/bin/system-status +ExecStart=/usr/local/bin/system-status IOSchedulingClass=idle CPUSchedulingPolicy=idle User=iank @@ -16,4 +16,4 @@ RestartSec=600 [Install] -WantedBy=grahical.target +WantedBy=graphical.target diff --git a/filesystem/usr/local/bin/myterm b/filesystem/usr/local/bin/myterm new file mode 100755 index 0000000..0ccb39c --- /dev/null +++ b/filesystem/usr/local/bin/myterm @@ -0,0 +1,14 @@ +#!/bin/sh +# using bin/sh for speed since the script is very simpl.e + +# these system76 systems have garbled display with konsole +# and some other apps like mumble. something about the intel +# graphics i think. +case $HOSTNAME in + sy|bo) + exec sakura "$@" + ;; + *) + exec konsole "$@" + ;; +esac diff --git a/filesystem/usr/local/bin/myupgrade b/filesystem/usr/local/bin/myupgrade index c206f8f..fb8d1d0 100755 --- a/filesystem/usr/local/bin/myupgrade +++ b/filesystem/usr/local/bin/myupgrade @@ -25,7 +25,7 @@ d() { if [[ $DEBUG ]]; then pee cat "wall -n" else - echo ok | sed 's/^/myupgrade /' | pee logger "wall -n" + sed 's/^/myupgrade /' | pee logger "wall -n" fi } myreboot() { @@ -52,8 +52,9 @@ myreboot() { # We should figure some workaround. if ! out=$(/sbin/needrestart -p 2>&1); then - printf "%s\n\n" "$out" - if [[ $HOSTNAME != "$MAIL_HOST" ]]; then + if [[ $HOSTNAME == "$MAIL_HOST" ]]; then + needrestart -r l + else myreboot fi fi diff --git a/i3-sway/common.conf b/i3-sway/common.conf index 0e49aec..29a25c4 100644 --- a/i3-sway/common.conf +++ b/i3-sway/common.conf @@ -81,7 +81,7 @@ bindsym $mod+9 workspace 10 bindsym $mod+Shift+m border toggle bindsym $mod+j exec emacsclient -c -bindsym $mod+k exec kitty +bindsym $mod+k exec myterm bindsym $mod+l exec dmenu_run # note default is 27% on my system76. not sure if these # keybinds will screw up other laptop brightness keys. diff --git a/install-my-scripts b/install-my-scripts index cb54350..c6f960a 100755 --- a/install-my-scripts +++ b/install-my-scripts @@ -38,7 +38,6 @@ x="$(readlink -f -- "${BASH_SOURCE[0]}")"; cd ${x%/*} # directory of this file rsync -t --chmod=755 --chown=root:root switch-mail-host btrbk-run mount-latest-subvol \ check-subvol-stale myi3status mailtest-check \ mailbindwatchdog \ - /a/bin/log-quiet/sysd-mail-once \ check-mailq \ unsaved-buffers.el \ mail-backup-clean \ diff --git a/filesystem/etc/default/prometheus-node-exporter b/machine_specific/kd/filesystem/etc/default/prometheus-node-exporter similarity index 100% rename from filesystem/etc/default/prometheus-node-exporter rename to machine_specific/kd/filesystem/etc/default/prometheus-node-exporter diff --git a/mail-setup b/mail-setup index 71b086b..0a41df0 100755 --- a/mail-setup +++ b/mail-setup @@ -660,6 +660,7 @@ fi case $HOSTNAME in $MAIL_HOST) + # todo, should this be after vpn service i /etc/systemd/system/unbound.service.d/nn.conf <>/etc/cron.d/mailtest </dev/null; then if [[ $HOSTNAME == "$MAIL_HOST" ]]; then diff --git a/rootsshsync b/rootsshsync index fa36a56..900a662 100755 --- a/rootsshsync +++ b/rootsshsync @@ -59,7 +59,7 @@ if [[ -e $user_ssh_dir/config ]]; then fi chown -R root:root /root/.ssh -rsync -t --chmod=755 --chown=root:root /b/ds/hssh /usr/local/bin +rsync -t --chmod=755 --chown=root:root /a/bin/ds/hssh /usr/local/bin if [[ -e /a/opt/btrbk/ssh_filter_btrbk.sh ]]; then install /a/opt/btrbk/ssh_filter_btrbk.sh /usr/local/bin diff --git a/subdir_files/.config/i3/config b/subdir_files/.config/i3/config index b126030..01b5c2c 100644 --- a/subdir_files/.config/i3/config +++ b/subdir_files/.config/i3/config @@ -81,7 +81,7 @@ bindsym $mod+9 workspace 10 bindsym $mod+Shift+m border toggle bindsym $mod+j exec emacsclient -c -bindsym $mod+k exec kitty +bindsym $mod+k exec myterm bindsym $mod+l exec dmenu_run # note default is 27% on my system76. not sure if these # keybinds will screw up other laptop brightness keys. diff --git a/subdir_files/.config/sakura/sakura.conf b/subdir_files/.config/sakura/sakura.conf index 3081b59..8854f22 100644 --- a/subdir_files/.config/sakura/sakura.conf +++ b/subdir_files/.config/sakura/sakura.conf @@ -65,3 +65,5 @@ icon_file=terminal-tango.svg use_fading=false scrollable_tabs=true word_chars=-,./?%&#_~: +search_accelerator=5 +search_key=F diff --git a/subdir_files/.config/sway/config b/subdir_files/.config/sway/config index f96355a..ba675c2 100644 --- a/subdir_files/.config/sway/config +++ b/subdir_files/.config/sway/config @@ -81,7 +81,7 @@ bindsym $mod+9 workspace 10 bindsym $mod+Shift+m border toggle bindsym $mod+j exec emacsclient -c -bindsym $mod+k exec kitty +bindsym $mod+k exec myterm bindsym $mod+l exec dmenu_run # note default is 27% on my system76. not sure if these # keybinds will screw up other laptop brightness keys. diff --git a/system-status b/system-status index 07c730d..f50d238 100755 --- a/system-status +++ b/system-status @@ -30,27 +30,38 @@ v() { } # log-once COUNT NAME [MESSAGE] lo() { - /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" root@localhost + if type -p ifne &>/dev/null; then + /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" root@localhost + fi } loday() { - /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" daylerts@iankelling.org + if type -p ifne &>/dev/null; then + /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" daylert@iankelling.org + fi } # todo, consider migrating some of these alerts into prometheus write-status() { chars=("${first_chars[@]}") - services=( - epanicclean - systemstatus - btrfsmaintstop - dynamicipupdate - ) + + services=( epanicclean ) + case $HOSTNAME in + bk|je|li) : ;; + *) + services+=( + systemstatus + btrfsmaintstop + dynamicipupdate + ) + ;; + esac + bads=() - if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$'; then + if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then for s in ${services[@]}; do - if [[ $(systemctl show -p SubState --value $s) != running ]]; then + if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then bads+=($s) fi done @@ -67,9 +78,9 @@ write-status() { case $HOSTNAME in kd) bads=() - if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$'; then + if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then for s in ${services[@]}; do - if [[ $(systemctl show -p SubState --value $s) != running ]]; then + if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then bads+=($s) fi done @@ -143,7 +154,7 @@ write-status() { chars+=(A) fi - glob=(/m/md/daylerts/{new,cur}/!(*,S)) + glob=(/m/md/daylert/{new,cur}/!(*,S)) if [[ -e ${glob[0]} ]]; then chars+=(DAY) fi @@ -175,12 +186,13 @@ write-status() { esac begin=false - if ! make -C /b/ds -q ~/.local/distro-begin || [[ $(<~/.local/distro-begin) != 0 ]]; then + + if ! make -C /b/ds -q ~/.local/distro-begin 2>/dev/null || [[ $(<~/.local/distro-begin) != 0 ]]; then begin=true fi end=false - if ! make -C /b/ds -q ~/.local/distro-end || [[ $(<~/.local/distro-end) != 0 ]]; then + if ! make -C /b/ds -q ~/.local/distro-end 2>/dev/null || [[ $(<~/.local/distro-end) != 0 ]]; then end=true fi @@ -261,7 +273,10 @@ write-status() { # leave it up to epanic-clean to send email notification fi - source /a/bin/bash_unpublished/source-state + if [[ -e /a/bin/bash_unpublished/source-state ]]; then + # /a gets remounted due to btrbk, ignore error code for file doesnt exist + source /a/bin/bash_unpublished/source-state || [[ $? == 1 ]] + fi if [[ $MAIL_HOST == "$HOSTNAME" ]]; then bbkmsg= if [[ $(systemctl is-active btrbk.timer) != active ]]; then @@ -304,10 +319,14 @@ write-status() { lo -1 old-snapshot $snapshotmsg fi - cat /a/bin/bash_unpublished/source-state >$status_file + if [[ ! -e $status_file || -w $status_file ]]; then + if [[ -e /a/bin/bash_unpublished/source-state ]]; then + cat /a/bin/bash_unpublished/source-state >$status_file + fi - if [[ ${chars[*]} ]]; then - echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file + if [[ ${chars[*]} ]]; then + echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file + fi fi } -- 2.30.2