fixes, prometheus, lots of stuff

author Ian Kelling <ian@iankelling.org>

Fri, 25 Mar 2022 06:25:24 +0000 (02:25 -0400)

committer Ian Kelling <ian@iankelling.org>

Fri, 25 Mar 2022 06:25:24 +0000 (02:25 -0400)
author Ian Kelling <ian@iankelling.org>
Fri, 25 Mar 2022 06:25:24 +0000 (02:25 -0400)
committer Ian Kelling <ian@iankelling.org>
Fri, 25 Mar 2022 06:25:24 +0000 (02:25 -0400)
diff --git a/bitfolk-chroot-install b/bitfolk-chroot-install

new file mode 100755 (executable)

index 0000000..37e77b7
--- /dev/null
+++ b/bitfolk-chroot-install
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+if ! test "$BASH_VERSION"; then echo "error: shell is not bash" >&2; exit 1; fi
+shopt -s inherit_errexit 2>/dev/null ||: # ignore fail in bash < 4.4
+set -eE -o pipefail
+trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" exit status: $?, PIPESTATUS: ${PIPESTATUS[*]}" >&2' ERR
+
+
+host=$1
+
+case $host in
+  je)
+    ip6=2001:ba8:1f1:f09d
+    ip4=85.119.82.128
+    ;;
+  bk)
+    ip6=2001:ba8:1f1:f0c9
+    ip4=85.119.83.50
+    ;;
+esac
+
+debconf-set-selections <<'EOF'
+locales locales/default_environment_locale select en_US.UTF-8
+locales locales/locales_to_be_generated multiselect en_US.UTF-8 UTF-8
+EOF
+
+# /a/bin/fai/fai/config/hooks/updatebase.UBUNTU
+debconf --owner=locales sh -c '
+   . /usr/share/debconf/confmodule
+   db_version 2.0
+   db_get locales/locales_to_be_generated &&
+   mkdir -p /var/lib/locales/supported.d &&
+   echo "$RET" > /var/lib/locales/supported.d/local'
+dpkg-reconfigure -fnoninteractive locales
+
+apt -y remove --purge --auto-remove netplan.io libnetplan0
+apt update
+apt -y install linux-virtual-hwe-20.04 grub-pc-bin openssh-server ifupdown rsync
+mkdir -p /root/.ssh
+chmod 700 /root/.ssh
+cat >/root/.ssh/authorized_keys <<'EOF'
+ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDX42yru/h6r6UDRy/VwVZjcYEmNLG5/SUjv7xwu43OaW0wL+uHYg2rkfn4Ygh5o1I5pgBh2SWg8TeWuVGhgL1SCuBzzeai/+58Sny065Qak+D2WjVOuRonRelh+CBA5EpNZPuiWQkoWdf9NACTBCbS2Zu7r8OOgRqu/ruaDNePlG5+U0Wlpy3oBnpbzQiuSA3AKMW30fsCJtOBjz5qQaiPbYEKJy3AOvtbq10wliKx9TpsTzrq8dKWs7PLhZnzqVCsaq6D95IzjqXcSpx4Cga5bn+YEuAnJQ53PGA5eO+hpz6HDmawTbJlaV/Dufb9bJ/ZZy1DXzs07yWRtTEY54/X ian@iankelling.org
+EOF
+
+
+# todo update this and hostname depending on host
+cat >/etc/network/interfaces <<EOF
+auto lo
+iface lo inet loopback
+
+# The primary network interface
+auto eth0
+iface eth0 inet static
+  address $ip4/21
+  gateway 85.119.80.1
+
+iface eth0 inet6 static
+    address $ip6::2
+    netmask 64
+    gateway $ip6::1
+    post-up echo 0 > /proc/sys/net/ipv6/conf/default/accept_ra
+    post-up echo 0 > /proc/sys/net/ipv6/conf/all/accept_ra
+    post-up echo 0 > /proc/sys/net/ipv6/conf/$IFACE/accept_ra
+    post-up echo 0 > /proc/sys/net/ipv6/conf/default/autoconf
+    post-up echo 0 > /proc/sys/net/ipv6/conf/all/autoconf
+    post-up echo 0 > /proc/sys/net/ipv6/conf/$IFACE/autoconf
+EOF
+
+cat >/etc/fstab <<'EOF'
+/dev/xvda1 / ext4 noatime,nodiratime 0 1
+/dev/xvdb1 none swap nofail,x-systemd.device-timeout=30s,x-systemd.mount-timeout=30s,sw  0 0
+EOF
+
+cat >> /etc/default/grub <<'EOF'
+GRUB_CMDLINE_LINUX_DEFAULT=""
+GRUB_CMDLINE_LINUX="console=hvc0"
+EOF
+
+update-grub
+
+cat >/etc/systemd/resolved.conf.d/servers.conf <<'EOF'
+[Resolve]
+DNS=85.119.80.232 85.119.80.233
+Domains=~.
+EOF
+
+cat >/etc/hostname <<EOF
+$host
+EOF
diff --git a/bitfolk-rescue-init b/bitfolk-rescue-init

new file mode 100644 (file)

index 0000000..9ff0a30
--- /dev/null
+++ b/bitfolk-rescue-init
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# meant to be copy/pasted
+
+ssh bk@bk.console.bitfolk.com
+# or
+ssh iankelling@iankelling.console.bitfolk.com
+
+destroy
+rescue
+sudo -i
+mkdir -p /root/.ssh
+chmod 700 /root/.ssh
+cat >/root/.ssh/authorized_keys <<'EOF'
+ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDX42yru/h6r6UDRy/VwVZjcYEmNLG5/SUjv7xwu43OaW0wL+uHYg2rkfn4Ygh5o1I5pgBh2SWg8TeWuVGhgL1SCuBzzeai/+58Sny065Qak+D2WjVOuRonRelh+CBA5EpNZPuiWQkoWdf9NACTBCbS2Zu7r8OOgRqu/ruaDNePlG5+U0Wlpy3oBnpbzQiuSA3AKMW30fsCJtOBjz5qQaiPbYEKJy3AOvtbq10wliKx9TpsTzrq8dKWs7PLhZnzqVCsaq6D95IzjqXcSpx4Cga5bn+YEuAnJQ53PGA5eO+hpz6HDmawTbJlaV/Dufb9bJ/ZZy1DXzs07yWRtTEY54/X ian@iankelling.org
+EOF
+apt update
+apt -y install openssh-server
+
+
+##### in another terminal ######
+
+host=je
+scp /b/ds/bitfolk* root@$host.b8.nz:
+ssh root@$host ./bitfolk-rescue-install $host
+
+### back to the 1st terminal
+
+poweroff
+boot
+
+# press ctrl ]
+exit
+
+jepush
+# todo: lets copy the host keys around so we dont have to do this.
+khfix je
+sl root@je /a/bin/ds/distro-begin
+
+# todo, fix it so i can ssh to
+sl je /a/bin/ds/distro-begin
+sl je /a/bin/ds/distro-end
diff --git a/bitfolk-rescue-install b/bitfolk-rescue-install

new file mode 100755 (executable)

index 0000000..3270a3b
--- /dev/null
+++ b/bitfolk-rescue-install
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+# assumes we've partitioned /dev/xvda1
+
+if ! test "$BASH_VERSION"; then echo "error: shell is not bash" >&2; exit 1; fi
+shopt -s inherit_errexit 2>/dev/null ||: # ignore fail in bash < 4.4
+set -eE -o pipefail
+trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" exit status: $?, PIPESTATUS: ${PIPESTATUS[*]}" >&2' ERR
+
+# already did this to ssh in
+#apt update
+apt install -y mmdebstrap
+
+wipefs -a /dev/xvda1
+
+mkfs.ext4 /dev/xvda1
+
+
+#apt-key export B138CA450C05112F
+cat  >trisquel.key <<'EOF'
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQINBFhxcQMBEADaT9jTxXNbmjx7kZdTK7JCFx2OAeSS0+XltJnGOPnd6Vj4W1u2
+QLReYD1rVVYA8kneT3VjvZpKO/Ho3TbQZ4E8hNDEwaVmKnTfrmptIzv44cHJexsZ
+eTol74rac/pC/oXCHGINWLflfyQt8iB0dGSEPjtDXvYNFTwBYrQDI9apO5JVWvCB
+qLaQdXU+MCsTaD5OZ4bJ2dQleI34UGea+NrrnAZP68d5hsLL+WTa65fhB3Bws8zc
+1v+JhVJhLYNQOcTHRXU7ieUN2zy+JzbD/10XV64YZQseEkhXG8LMRvfbTDD/SigD
+wKViVFkDa7NFfmpOE9kaF6Nh/XrWmMJjG49KtYUln/G2XCV4TpJrbrWW0OxGqki+
+HH/36N63CZR4lj0EENFQduikTq1LeyQBXQSccwH9FJEI/4Uz5uuVM+tviScmgWEC
+YXo7PdCoUUJDiR6Uma4COAYOTHM/7C5wVofkH1mq5fSz6rbBNIDIoy/W6GrN41s0
+WOl+z9ZgNlY1qtvpwSDenY9WERLajUAJKVTAwI2mvIETo+q8b2J8iIlfI8KkW/+q
+qt87BkFmo2Acgh9l0O1j/Ysp5p7OtCjz8uuO+WzbBo6RlXafx/9e6QoatbtwSBUp
+4W3w2/VANRYtL5DyDCbcuwMk77r9jXp1913sTFlin4xIs840gcVahetA+QARAQAB
+tDFUcmlzcXVlbCBHTlUvTGludXggPHRyaXNxdWVsLWRldmVsQHRyaXNxdWVsLmlu
+Zm8+iQI2BBMBCgAhBQJYcXEDAhsDBQsJCAcDBRUKCQgLBRYDAgEAAh4BAheAAAoJ
+ELE4ykUMBREv6NUP9jTl1CZKHqL3NmF2Df/ID+za7YO5IebxbzRC9vkjrWSuoMpw
+xJ/U5pBCsz0bDewJXMx0XeSNgo/WAzWoPmh3PTUXKhLjlGS2DII96XXbMy2zi+3r
++apIY3wedYkC/BiK9w8pGXGGlTXTo9zitWZC2/yWC4I9W2818mMJPXpQjvkzqdwU
+UlV54fpnqoMNsLFEa1w1ahDerdCTe0Azrr+3YrKaQ287MpkWwO/Cf/yYg0UhbDih
+FFMZ4Wa9aywvxQ86khghOafLLkIHcakMBdVRodym00bGeAjeNHnMffDi3k8tfejk
+g6iLVrZf21+KsVfV+PLX4QQsPCR/dlneKKCAEPh+awserncssizx2/ujhvTd7z3l
+tXGG7UcQP7fYTBWNkU7+ddMOWp26hOsINt0NyxhiGT2ZPEy1vpZ1H80rlaMkiISH
+Z56SCfcHGuEHlkDKdz7ZsS8gU+zqMAnNEDb6UrqZZbhJcR3N7DsTQC/okoF8egDM
+nHD9pUdDakPungnt6j7eLA6Ogca58mbIvwsQ+Qn9Urcd0m30to6WCTlj4jKsrMy/
+QtbyVSc/G3PZXVqP8xWIpuZtu0eMx+NjCKWmOYvTgIjbjLp0z801Weexn9uva+1z
+5nRy+00aOddoLhtXqNFxNS94gXvH3D6ZJ2ejADooEBiqk1M+KWFtOsW01QG5Ag0E
+WHFxAwEQALAKjsSSREoYjswMG1/znBkoNz199DkKJ3DnOk5NulkZcAoHeQVLnv2M
+/1qycG4ndoIkINdz37fKdFziEJd4cBSQ+3gNollaxM2x7KdF7M13Z4YgFgg40qxO
+8id8CSLga2klnFU8aa5PtRPYd4XZ5azpxzCRF8u+1ojM+rLAO0hKLGDhBqjKFvG4
+ASeX14F1R2yiGvZU2lQKQu2ZIk9IqN7M0IsCEh1O8+GNd7lCTFyvAYK0ai0dg9Q5
+F6X2YvQVYDik7rOuP6D6oUmGXufi2vc0OxFX5dBHa4z7XR0BRzg9VtkUerHSbVPI
+c+3mgG5+QmlD+3NKYqiTZvKOWQbgKD/Pg0E0hqw8IjSThmge8XQcTh6qhW8ww0Gz
+ha6HN0At5kMGbQqsTARjfgjhJZdyjA09NGYu1KVKDrKMrN9le9tO05ztZeP3y5My
+S9LaWDE6Flm0BBqkkrHDk+9ID/qDixe/3ZCppu3dJsCF8aaG+sIQjxlMAeXtKOLl
+ZuQbPaVJbQXElwZo3Nz20N2RAZJLXycCev7EbC8Afpg6TYjlJyJX9uyKxDv+QORG
+RJ5vFA4evNCmUrS0PpcodJxk5TMSuR9vRuvT1jVVMe18T1F74XRqTW6xizC7EEM8
+X5QLDuVMRErSUPfcNYLTGJAvPTQ/EgU5aK+H1qv1EEbXeMiuksdvABEBAAGJAh8E
+GAEKAAkFAlhxcQMCGwwACgkQsTjKRQwFES/C0A//aT9JDbwF4JYgyxQuPuxb8G/e
+9thHNBhPmGL7gpyGzUW3q/c6HHnFxT7YPA37fsN/JD9Mcdx2rRFhz0XVR6cfdQZy
+299s2/aX4Tu4FbMnmM+Du5uFFgStJA7LjaacHn6MxEohUeZAL7LMYiUovbwnsaiP
+0sPhLaMrOQkRL/9mEKJiNbn6r/xX4xegzYNqoNdDKbcARaAzm5AH03Mmbc7Ss+OZ
+4v/7vlcUnyEZ2c4jazP7W+pGWIw9f3SqnIxuCeDrCD35IFsUrE27dbtaNpkKw9zF
+lfaEC+6PAI7M78gg2RNvaurCJR5B7bENrobf0lxbYGLGFcOIqTXkbuWjjO3eI/5Q
+rmnO8Uy41Zos03Gsa6QkQ4p6OtVN4hHLxXkirs31cIocPqiJ7Vi+OH8stMNukvVT
+dgnuw4dbPEhDnrFREDNSuRtV+2Lxl4JLr7gQUQDZKEf8cYZUAdN69dcW48Ugdvgu
+6cRDVWakfim6kvZiQ0vxGxGM02V3RdhhZqrwXXYUPyyWMW230IjYc9cYQ+3C/1K4
+MUUeMjKDMPQ/jlUiMjZeE+X0W/TaUj8uCOJ4M6+oYMqwUECPSFe9Of7VTKhB3+Ex
+wGEtYWJUfhuYu8Tph2GZmud0vz4+ugpkliFVliGJfPPJ1EfgAAiUUvomoIXKsynV
+McDbwCjFQn2iazszZsg=
+=UAIm
+-----END PGP PUBLIC KEY BLOCK-----
+EOF
+
+apt-key add trisquel.key
+
+mount /dev/xvda1 /mnt
+
+mmdebstrap nabia /mnt - <<'EOF'
+deb http://archive.trisquel.org/trisquel/ nabia main
+deb-src http://archive.trisquel.org/trisquel/ nabia main
+
+deb http://archive.trisquel.org/trisquel/ nabia-updates main
+deb-src http://archive.trisquel.org/trisquel/ nabia-updates main
+
+deb http://archive.trisquel.info/trisquel/ nabia-security main
+deb-src http://archive.trisquel.info/trisquel/ nabia-security main
+
+deb http://archive.trisquel.org/trisquel/ nabia-backports main
+deb-src http://archive.trisquel.org/trisquel/ nabia-backports main
+EOF
+
+cd /mnt
+
+chrbind() {
+  local d
+  # dev/pts needed for pacman signature check
+  for d in dev proc sys dev/pts; do
+    [[ -d $d ]]
+    if ! mountpoint $d &>/dev/null; then
+      mount -o bind /$d $d
+    fi
+  done
+}
+chrbind
+
+host=$1
+cp /root/bitfolk-chroot-install /mnt
+chroot . /bitfolk-chroot-install $host
+
+poweroff
+
+boot
diff --git a/bk-backup b/bk-backup

index 3621e76474ce2c4b6be7827ac6fee82abddc5e07..cc25cecd3ac1bdc233614459b9f3c10f3bc2a9a0 100755 (executable)
--- a/bk-backup
+++ b/bk-backup
@@ -37,14 +37,17 @@ ret=0
  if [[ $HOSTNAME == $MAIL_HOST ]]; then
    mkdir -p /p/bkbackup
    for ncdir in /var/www/ncexpertpath /var/www/ncninja; do
+    if [[ ! -d $ncdir ]]; then
+      continue
+    fi
      ncbase=${ncdir##*/}
      mkdir -p /p/bkbackup/$ncbase
      ssh root@$host sudo -u www-data php $ncdir/occ -q maintenance:mode --on
      rsync -ra --exclude=testignore --delete root@$host:$ncdir/{config,data,themes} /p/bkbackup/$ncbase || ret=$?
      ssh root@$host sudo -u www-data php $ncdir/occ -q maintenance:mode --off
      if (( ret )); then
-       echo "$0: error: failed rsync $ncdir"
-       ret=1
+      echo "$0: error: failed rsync $ncdir"
+      ret=1
      fi
    done
    rsync -ra --delete root@$host:/m /p/bkbackup
diff --git a/brc b/brc

index 00f217210737514dc4f401738997cf6c1992d572..99b5c03954072adb91277b57aad18d7b6a44c40b 100644 (file)
--- a/brc
+++ b/brc
@@ -397,6 +397,24 @@ b() {
    c -
  }
  
+vp9() {
+  in=$PWD/$1
+
+  if [[ $2 ]]; then
+    out=$PWD/$2
+  else
+    out=$PWD/vp9/$1
+  fi
+  cd $(mktemp -d)
+  pwd
+  ffmpeg -threads 0 -i $in -g 192 -vcodec libvpx-vp9 -vf scale=-1:720 -max_muxing_queue_size 9999  -b:v 750K -pass 1 -an -f null /dev/null && \
+    ffmpeg -y -threads 0 -i $in -g 192 -vcodec libvpx-vp9 -vf scale=-1:720 -max_muxing_queue_size 9999 -b:v 750K -pass 2 -c:a libvorbis -qscale:a 5 $out
+  cd -
+}
+
+utcl() { # utc 24 hour time to local hour 24 hour time
+  echo "print( ($1  $(date +%z | sed -r 's/..$//;s/^(-?)0*/\1/')) % 24)"|python3
+}
  
  # c. better cd
  if type -p wcd &>/dev/null; then
diff --git a/brc2 b/brc2

index 6b977839c870f2a4d37c0ae8bc3a3cc31aebe250..c233c9288354125d0d679f04deee6ee862666188 100644 (file)
--- a/brc2
+++ b/brc2
@@ -502,24 +502,23 @@ EOF
    done
  }
  bindpushb8() {
-  dsign iankelling.org expertpathologyreview.com zroe.org amnimal.ninja
    lipush
    for h in li bk; do
      m sl $h <<'EOF'
  source ~/.bashrc
-m dnsup
  m dnsb8
  EOF
    done
  }
  
  dnsup() {
-  conflink
+  conflink -f
    m ser reload bind9
  }
  dnsb8() {
    local f=/var/lib/bind/db.b8.nz
    ser stop bind9
+  sleep 1
    sudo rm -fv $f.jnl
    sudo install -m 644 -o bind -g bind /p/c/machine_specific/vps/bind-initial/db.b8.nz $f
    ser restart bind9
@@ -1075,8 +1074,8 @@ Address = 10.8.0.$ipsuf/24
  PostUp = ping -c1 10.8.0.1 ||:
  
  [Peer]
-# li
-PublicKey = zePGl7LoS3iv6ziTI/k8BMh4L3iL3K2t9xJheMR4hQA=
+# li. called wgmail on that server
+PublicKey = CTFsje45qLAU44AbX71Vo+xFJ6rt7Cu6+vdMGyWjBjU=
  AllowedIPs = 10.8.0.0/24
  Endpoint = 72.14.176.105:1194
  PersistentKeepalive = 25
@@ -1084,7 +1083,7 @@ EOF
    umask $umask_orig
    # old approach. systemd seems to work fine and cleaner.
    rm -f ../network/interfaces.d/wghole
-  cedit -q $host /p/c/machine_specific/li/filesystem/etc/wireguard/wghole.conf <<EOF || [[ $? == 1 ]]
+  cedit -q $host /p/c/machine_specific/li/filesystem/etc/wireguard/wgmail.conf <<EOF || [[ $? == 1 ]]
  [Peer]
  PublicKey = $(cat hole-pub.key)
  AllowedIPs = 10.8.0.$ipsuf/32
@@ -1524,14 +1523,14 @@ daylertme() {
    if [[ -t 0 ]]; then
      exim -t <<EOF
  From: alertme@b8.nz
-To: daylerts@iankelling.org
+To: daylert@iankelling.org
  Subject: $*
  EOF
    else
      read sub
      { cat <<EOF
  From: alertme@b8.nz
-To: daylerts@iankelling.org
+To: daylert@iankelling.org
  Subject: $sub
  
  EOF
@@ -1549,7 +1548,7 @@ alert200() {
      if torsocks wget -q "$url"; then
        alertme $tmpdir
      fi
-    sleep 600 + $(( RANDOM % 300 ))
+    sleep $(( 600 + RANDOM % 300 ))
    done
  }
  
@@ -1742,6 +1741,11 @@ vpn() {
    sudo systemd-tty-ask-password-agent
  }
  
+ufix() {
+  ls -lad /run/user/1000
+  s chmod 700 /run/user/1000; s chown iank.iank /run/user/1000
+}
+
  # systemctl is-enabled / status / cat says nothing, instead theres
  # some obscure symlink. paths copied from man systemd.unit.
  # possibly also usefull, but incomplete, doesnt show units not loaded in memory:
diff --git a/btrfsmaint b/btrfsmaint

index 6c7dbb1f6788119cafa9cac1dc2cae559cd2c07b..1639a8dd48f450e69fdf67558a4780059583ec1d 100755 (executable)
--- a/btrfsmaint
+++ b/btrfsmaint
@@ -15,7 +15,12 @@ source /a/bin/errhandle/err
  dusage="5 10"
  musage="5"
  
-e() { echo "cron: $*"; "$@"; }
+e() {
+  echo "cron: $*"
+  if ! $dryrun; then
+    "$@"
+  fi
+}
  
  check-idle() {
    type -p xprintidle &>/dev/null || return 0
@@ -39,7 +44,7 @@ check-idle() {
  
  usage() {
    cat <<EOF
-Usage: ${0##*/} args
+Usage: ${0##*/} [ARGS]
  Do btrfs maintence or stop if xprintidle shows a user
  
  force  Run regardless of user idle status
@@ -56,6 +61,7 @@ EOF
  
  force=false
  check=false
+dryrun=false
  if [[ $1 ]]; then
    case $1 in
      check)
@@ -64,6 +70,9 @@ if [[ $1 ]]; then
      force)
        force=true
        ;;
+    dryrun)
+      dryrun=true
+      ;;
      *)
        echo "$0: error: unexpected arg" >&2
        usage 1
@@ -93,8 +102,12 @@ main() {
      fi
  
      if ! $idle; then
-      btrfs scrub cancel $mnt &>/dev/null ||:
-      continue
+      if $dryrun; then
+        echo "$0: not idle. if this wasnt a dry run, btrfs scrub cancel $mnt"
+      else
+        btrfs scrub cancel $mnt &>/dev/null ||:
+        continue
+      fi
      fi
      if $check; then
        continue
@@ -118,20 +131,31 @@ main() {
          e ionice -c 3 btrfs balance start -musage=$usage $mnt
        done
      fi
-    # e btrfs filesystem df $mnt
-    # e df -H $mnt
-    date=$(
-      btrfs scrub status $mnt | \
-        sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p'
-        )
+    date=
+    scrub_status=$(btrfs scrub status $mnt)
+    if printf "%s\n" "$scrub_status" | grep -i '^status:[[:space:]]*finished$' &>/dev/null; then
+      date=$(printf "%s\n" "$scrub_status" | sed -rn 's/^Scrub started:[[:space:]]*(.*)/\1/p')
+    fi
+    if [[ ! $date ]]; then
+      # output from older versions, at least btrfs v4.15.1
+      date=$(
+        printf "%s\n" "$scrub_status" | \
+          sed -rn 's/^\s*scrub started at (.*) and finished.*/\1/p'
+          )
+    fi
      if [[ $date ]]; then
+      if $dryrun; then
+        echo "$0: last scrub finish for $mnt: $date"
+      fi
        date=$(date --date="$date" +%s)
        # if date is sooner than 90 days ago
        # the wiki recommends 30 days or so, but
        # it makes the comp lag like shit for a day,
        # so I'm going with 90 days.
        if (( date > $(date +%s) - 60*60*24*30 )); then
-        echo "cron: skiping scrub of $mnt"
+        if $dryrun; then
+          echo "$0: skiping scrub of $mnt, last was $(( ($(date +%s) - date) / 60/60/24 )) days ago, < 30 days"
+        fi
          continue
        fi
      fi
diff --git a/check-remote-mailqs b/check-remote-mailqs

index 3caaa61be67db995ba291b24038937cd299e7c8b..6e9cf7c2cebbd1cfde4df4fe0d971dd9b1802055 100755 (executable)
--- a/check-remote-mailqs
+++ b/check-remote-mailqs
@@ -27,8 +27,8 @@ for h in bk je li frodo kwwg x3wg x2wg kdwg sywg; do
      if [[ -s $statefile ]]; then
        logsec=$(date +%s -d "$(head -n1 $statefile | awk '{print $1,$2}')")
        nowsec=$(date +%s)
-      if (( logsec < nowsec - 60*60*48 )); then
-        echo $0: host $h ssh /usr/local/bin/check-mailq fail for over 48 hours
+      if (( logsec < nowsec - 60*60*20 )); then
+        echo $0: host $h ssh /usr/local/bin/check-mailq fail for over 20 hours
        fi
      fi
      printf "%s\n" "$c" | ts "%F %T" >> $statefile
diff --git a/conflink b/conflink

index d3a7b2a7663c09dbbf6cbee679a75790383c8dc6..defb8321d87e1fef55112a6c11e8f28f8e7ff52e 100755 (executable)
--- a/conflink
+++ b/conflink
@@ -81,7 +81,10 @@ common-file-setup() {
    for dir in "$@"; do
      fs=$dir/filesystem
      if [[ -e $fs && $user =~ ^iank?$ ]]; then
-      cmd=( s rsync -aiSAX --chown=root:root --chmod=g-s
+      # we dont want t, instead c for checksum.
+      # That way we dont set times on directories.
+      # -a = -rlptgoD
+      cmd=( s rsync -rclpgoDiSAX --chown=root:root --chmod=g-s
              --exclude=/etc/dovecot/users
              --exclude='/etc/exim4/passwd*'
              --exclude='/etc/exim4/*.pem'
@@ -92,7 +95,11 @@ common-file-setup() {
          case $file in
            etc/prometheus/rules/iank.yml)
              case $HOSTNAME in
-              kd) m s systemctl reload prometheus ;;
+              kd)
+                if systemctl is-active prometheus &>/dev/null; then
+                  m s systemctl reload prometheus
+                fi
+                ;;
              esac
              ;;
            etc/systemd/system/*)
@@ -115,7 +122,7 @@ common-file-setup() {
        done < <("${cmd[@]}")
      fi
  
-    if [[ -e $dir/subdir_files ]]; then
+    if ! $fast && [[ -e $dir/subdir_files ]]; then
        m subdir-link-r $dir/subdir_files
      fi
      local x=( $dir/!(binds|subdir_files|filesystem|machine_specific|..|.|.#*) )
@@ -185,7 +192,7 @@ case $user in
      for f in /etc/prometheus-{,export-}htpasswd; do
        if [[ -e $f ]]; then
          s chmod 640 $f
-        if getent passwd www-data; then
+        if getent passwd www-data &>/dev/null; then
            s chown root:www-data $f
          fi
        fi
@@ -194,15 +201,15 @@ case $user in
      if [[ -e $f ]]; then
        # note: this is duplicative of the file's own permissions
        s chmod 640 $f /etc/prometheus-pass
-      if getent passwd prometheus; then
+      if getent passwd prometheus &>/dev/null; then
          s chown root:prometheus $f
        fi
      fi
-
-
      ##### end special extra stuff #####
  
-    m sudo -H -u user2 "${BASH_SOURCE[0]}"
+    if ! $fast; then
+      m sudo -H -u user2 "${BASH_SOURCE[0]}"
+    fi
  
      f=/a/bin/distro-setup/system-status
      if [[ -x $f ]]; then
diff --git a/distro-begin b/distro-begin

index e9699408655ccb6aa4e6a52271937a2edb7b6e09..e2d1a8713dbef07c58956d681244aa565d86463b 100755 (executable)
--- a/distro-begin
+++ b/distro-begin
@@ -78,7 +78,10 @@ fi
  
  ### arg parsing
  recompile=false
-emacs=true
+emacs=false
+if [[ -e /a/opt/emacs ]]; then
+  emacs=true
+fi
  while [[ $1 == -* ]]; do
    case $1 in
      -r) recompile=true; shift ;;
@@ -97,7 +100,7 @@ source $script_dir/pkgs
  set +x
  source /a/bin/distro-functions/src/identify-distros
  $interactive || set -x
-for f in kd x2 x3 frodo tp li bk je demohost kw; do
+for f in kd x2 x3 frodo tp li bk je demohost kw sy bo; do
    eval "$f() { [[ $HOSTNAME == $f ]]; }"
  done
  codename=$(debian-codename)
@@ -233,6 +236,10 @@ sudo sed -i --follow-symlinks -f - /etc/hosts <<EOF
  /^127\.0\.1\.1/d
  EOF
  
+if bitfolk; then
+  sudo systemctl disable systemd-networkd
+fi
+
  ##### exit first stage if running as root
  if [[ $EUID == 0 ]]; then
    echo "$0: running as root. exiting now that users are setup"
@@ -248,6 +255,25 @@ lnf $x /root
  EOF
  done
  
+###### link files
+# convenient to just do all file linking in one place
+sudo /a/exe/lnf -T /a/bin /b
+sudo /a/exe/lnf -T /a/f /f
+sudo /a/exe/lnf -T /var/log/exim4 /el
+sudo /a/exe/lnf -T /a/f/ans /c
+sudo /a/exe/lnf -T /nocow/t /t
+if has_p; then
+  lnf -T /p/News ~/News
+fi
+dirs=(/q/root /q/root/.editor-backups /q/root/.undo-tree-history)
+sudo mkdir -p ${dirs[@]}
+sudo chmod 600 ${dirs[@]}
+sudo /a/exe/lnf /q/root/.editor-backups /q/root/.undo-tree-history \
+     /a/opt /a/c/.emacs.d $HOME/mw_vars /k/backup /root
+/a/bin/ds/install-my-scripts # needed for rootsshsync cronjob
+sudo /a/exe/lnf /a/c/.vim /a/c/.vimrc /a/c/.gvimrc /root
+
+
  ###### do conflink
  # vps needs bind group before conflink
  if vps; then
@@ -260,6 +286,8 @@ if [[ -e /etc/rootsudoenv ]]; then
    source /etc/rootsudoenv
  fi
  
+
+
  ###### bash environment setup
  set +x
  err-allow
@@ -428,26 +456,6 @@ case $(debian-codename-compat) in
  esac
  
  
-###### link files
-# convenient to just do all file linking in one place
-sudo /a/exe/lnf -T /a/bin /b
-sudo /a/exe/lnf -T /a/f /f
-sudo /a/exe/lnf -T /var/log/exim4 /el
-sudo /a/exe/lnf -T /a/f/ans /c
-sudo /a/exe/lnf -T /nocow/t /t
-if has_p; then
-  lnf -T /p/News ~/News
-fi
-dirs=(/q/root /q/root/.editor-backups /q/root/.undo-tree-history)
-sudo mkdir -p ${dirs[@]}
-sudo chmod 600 ${dirs[@]}
-sudo /a/exe/lnf /q/root/.editor-backups /q/root/.undo-tree-history \
-     /a/opt /a/c/.emacs.d $HOME/mw_vars /k/backup /root
-/a/bin/ds/install-my-scripts # needed for rootsshsync cronjob
-sudo /a/exe/lnf /a/c/.vim /a/c/.vimrc /a/c/.gvimrc /root
-
-
-
  
  #### arch specific early packages
  case $(distro-name) in
diff --git a/distro-end b/distro-end

index 407987ad8c34852078ce352dd803d12fa589250f..a5ba321b9812b4e72771880f232a11de6a2d215a 100755 (executable)
--- a/distro-end
+++ b/distro-end
@@ -523,7 +523,13 @@ Package: chromium-*
  Pin: release n=bionic
  Pin-Priority: 500
  EOF
-
+    ;;
+  nabia)
+    sd /etc/apt/preferences.d/chromium-bullseye <<EOF
+Package: chromium chromium-* libicu67 libjpeg62-turbo libjsoncpp24 libre2-9 libwebpmux3
+Pin: release o=Debian*,n=bullseye*
+Pin-Priority: 500
+EOF
      ;;
  esac
  
@@ -598,7 +604,22 @@ case $HOSTNAME in
      if [[ ! -e $f ]]; then
        dnsb8
      fi
+
+    pi prometheus-node-exporter
+
+    # ex for exporter
+    web-conf -p 9101 -f 9100 - apache2 ${HOSTNAME}ex.b8.nz <<'EOF'
+<Location "/">
+AuthType Basic
+AuthName "basic_auth"
+# created with
+# htpasswd -c prometheus-export-htpasswd USERNAME
+AuthUserFile "/etc/prometheus-export-htpasswd"
+Require valid-user
+</Location>
+EOF
      ;;&
+
    bk)
      sgo wg-quick@wgmail
  
@@ -1656,7 +1677,7 @@ m /a/bin/buildscripts/rust
  m /a/bin/buildscripts/misc
  m /a/bin/buildscripts/pithosfly
  #m /a/bin/buildscripts/alacritty
-m /a/bin/buildscripts/kitty
+#m /a/bin/buildscripts/kitty
  
  pi-nostart virtinst virt-manager
  soff libvirtd
@@ -1832,7 +1853,10 @@ sudo debconf-set-selections <<EOF
  grub-pc grub-pc/install_devices multiselect ${devs[*]}
  EOF
  
-# btrfs maintenance
+
+sysd-prom-fail-install dynamicipupdate
+sysd-prom-fail-install systemstatus
+sysd-prom-fail-install btrfsmaintstop
  sgo btrfsmaint.timer
  sgo btrfsmaintstop
  sgo systemstatus
@@ -1857,7 +1881,8 @@ case $HOSTNAME in
    kd)
      # ive got these + a needed dependency pinned to bullseye, just to get
      # versions more in line with the main docs.
-    pi prometheus-alertmanager prometheus prometheus-node-exporter
+    # Font awesome is needed for the alertmanager ui.
+    pi prometheus-alertmanager prometheus prometheus-node-exporter fonts-font-awesome
      web-conf -p 9091 -f 9090 - apache2 i.b8.nz <<'EOF'
  <Location "/">
  AuthType Basic
@@ -1868,6 +1893,20 @@ AuthUserFile "/etc/prometheus-htpasswd"
  Require valid-user
  </Location>
  EOF
+    # by default, the alertmanager web ui is not enabled other than a page
+    # that suggests to use the amtool cli. that tool is good, but you cant
+    # silence things nearly as fast.
+    if [[ ! -e /usr/share/prometheus/alertmanager/ui/index.html ]]; then
+      sudo chroot /nocow/schroot/bullseye prometheus-alertmanager
+      sudo chroot /nocow/schroot/bullseye /usr/share/prometheus/alertmanager/generate-ui.sh
+      sudo rsync -avih /nocow/schroot/bullseye/usr/share/prometheus/alertmanager/ui/ /usr/share/prometheus/alertmanager/ui
+      ser restart prometheus-alertmanager
+    fi
+
+    for ser in prometheus-node-exporter prometheus-alertmanager prometheus; do
+      sysd-prom-fail-install $ser
+    done
+
      ;;
    *)
      pi prometheus-node-exporter
@@ -1881,21 +1920,10 @@ case $HOSTNAME in
    # either use iptables or, in
    # /etc/default/prometheus-node-exporter
    # listen on the wireguard interface
-  li|je|bk)
-    # ex for exporter
-    web-conf -p 9101 -f 9100 - apache2 ${HOSTNAME}ex.b8.nz <<'EOF'
-<Location "/">
-AuthType Basic
-AuthName "basic_auth"
-# created with
-# htpasswd -c prometheus-export-htpasswd USERNAME
-AuthUserFile "/etc/prometheus-export-htpasswd"
-Require valid-user
-</Location>
-EOF
-    ;;
    *)
      wgip=$(command sudo sed -rn 's,^ *Address *= *([^/]+).*,\1,p' /etc/wireguard/wghole.conf)
+    # old filename. remove once all hosts are updated.
+    s rm -fv /etc/apache2/sites-enabled/${HOSTNAME}wg.b8.nz.conf
      web-conf -i -a $wgip -p 9101 -f 9100 - apache2 ${HOSTNAME}wg.b8.nz <<'EOF'
  <Location "/">
  AuthType Basic
@@ -1906,7 +1934,25 @@ AuthUserFile "/etc/prometheus-export-htpasswd"
  Require valid-user
  </Location>
  EOF
-  ;;
+    # For work, i think we will just use the firewall for hosts in the main data center, and
+    # apache/nginx + tls + basic auth outside of it. or consider stunnel.
+
+
+    # TODO: figure out how to detect the ping failure and try again.
+
+    # Binding to the wg interface, it might go down, so always restart, and wait for it on boot.
+    s mkdir /etc/systemd/system/apache2.service.d
+    sd /etc/systemd/system/apache2.service.d/restart.conf <<EOF
+[Unit]
+After=wg-quick@wghole.service
+StartLimitIntervalSec=0
+
+[Service]
+Restart=always
+RestartSec=30
+EOF
+
+    ;;
  esac
  
  ### end prometheus ###
diff --git a/dynamic-ip-update b/dynamic-ip-update

index 1e7880274c3a57a6801d930a20e3a1ded1b27dc0..b7c60f520eb23904c7684414d735f7bd8b91e520 100755 (executable)
--- a/dynamic-ip-update
+++ b/dynamic-ip-update
@@ -1,8 +1,6 @@
  #!/bin/bash
  source ~/.bashrc
  
-
-
  main() {
  
    fqdn=$(hostname -f)
@@ -46,7 +44,12 @@ main() {
  
  
    if $athome; then
-    cur4="$(dig +short $dynhost @iankelling.org | tail -1)"
+    if ! cur4="$(dig +short $dynhost @iankelling.org | tail -1)"; then
+      if [[ ! $INVOCATION_ID ]]; then
+        echo "$0: dig failed. internet looks down. giving up"
+      fi
+      return 0
+    fi
      if ip4=$(curl -s4 https://iankelling.org/cgi/pubip); then
        if [[ $cur4 && $ip4 && $cur4 != $ip4 ]]; then
          up4=true # update ipv4
diff --git a/filesystem/etc/default/prometheus b/filesystem/etc/default/prometheus

index e733d592554eb0586fb29405e1c77be19f835b04..9ee91ab012907f20047332ee5daead948c2f216b 100644 (file)
--- a/filesystem/etc/default/prometheus
+++ b/filesystem/etc/default/prometheus
@@ -2,7 +2,7 @@
  
  # Set the command-line arguments to pass to the server.
  
-ARGS="--web.listen-address=127.0.0.1:9090 --web.external-url=https://i.b8.nz:9091"
+ARGS="--web.listen-address=127.0.0.1:9090 --web.external-url=https://i.b8.nz:9091 --log.level=info"
  
  
  
diff --git a/filesystem/etc/default/prometheus-alertmanager b/filesystem/etc/default/prometheus-alertmanager

index 4ff43f2bc7a3a73d3a973d38fd0fcc974dfbb73c..b77df7471e5c4df2e210120e5ced4a1e64c2bfef 100644 (file)
--- a/filesystem/etc/default/prometheus-alertmanager
+++ b/filesystem/etc/default/prometheus-alertmanager
@@ -2,8 +2,16 @@
  # default:
  #ARGS=""
  
-# iank:
-ARGS="--web.listen-address=127.0.0.1:9093"
+# from its README: If running Alertmanager in high availability mode is not
+#  desired, setting --cluster.listen-address= prevents Alertmanager from
+#  listening to incoming peer requests.
+# Why the fuck is that in the readme, not the docs below?
+# If you don't add that, it will fail to start on boot because
+# it doesn't wait for network.target, and gives this error message:
+#  component=cluster err="couldn't deduce an advertise address: no private IP found, explicit advertise addr not provided"
+
+
+ARGS="--cluster.listen-address= --web.listen-address=127.0.0.1:9093"
  
  # this file is from version 0.21
  
diff --git a/filesystem/etc/prometheus/alertmanager_templates/iank.tmpl b/filesystem/etc/prometheus/alertmanager_templates/iank.tmpl

new file mode 100644 (file)

index 0000000..69defe6
--- /dev/null
+++ b/filesystem/etc/prometheus/alertmanager_templates/iank.tmpl
@@ -0,0 +1,10 @@
+{{ define "iank.default.description" }}
+{{ if gt (len .Alerts.Firing) 0 -}}
+Alerts Firing:
+{{ template "__text_alert_list" .Alerts.Firing }}
+{{- end }}
+{{ if gt (len .Alerts.Resolved) 0 -}}
+Alerts Resolved:
+{{ template "__text_alert_list" .Alerts.Resolved }}
+{{- end }}
+{{- end }}
diff --git a/filesystem/etc/prometheus/file_sd/node.yml b/filesystem/etc/prometheus/file_sd/node.yml

index e58d520af1d30823de94f403590b7ec9d632b46d..61c5184d87e3b57b2f3854c99fc4f97c0721efb4 100644 (file)
--- a/filesystem/etc/prometheus/file_sd/node.yml
+++ b/filesystem/etc/prometheus/file_sd/node.yml
@@ -1,10 +1,7 @@
  - targets:
    - kdwg:9101
    - sywg:9101
-  # - bk:9101
-  # - je:9101
-  # - li:9101
    # - frodo:9101
-  # - kwwg:9101
-  # - x3wg:9101
-  - x2wg:9101
+   - kwwg:9101
+   - x3wg:9101
+#  - x2wg:9101
diff --git a/filesystem/etc/prometheus/file_sd/tlsnode.yml b/filesystem/etc/prometheus/file_sd/tlsnode.yml

index 47f8c7c842a18b3ae121809dbd0a239f4771d67a..4352719013e7c1b6e37dd9048b4ed6a6614e4933 100644 (file)
--- a/filesystem/etc/prometheus/file_sd/tlsnode.yml
+++ b/filesystem/etc/prometheus/file_sd/tlsnode.yml
@@ -1,4 +1,4 @@
  - targets:
-  # - bk:9101
-  # - je:9101
-  # - li:9101
+   - bkex.b8.nz:9101
+   - jeex.b8.nz:9101
+   - liex.b8.nz:9101
diff --git a/filesystem/etc/prometheus/rules/iank.yml b/filesystem/etc/prometheus/rules/iank.yml

index 4439c41d41cb61c6a59a4bbb5b967fbf0aeebbb3..048edafa5f801b26b858e1c9bfd0016ee92a3a78 100644 (file)
--- a/filesystem/etc/prometheus/rules/iank.yml
+++ b/filesystem/etc/prometheus/rules/iank.yml
@@ -8,34 +8,129 @@
  groups:
  - name: standard
    rules:
-  - alert: mailtest-check
+
+## uncomment for testing an alert firing
+#   - alert: test-alert4
+#     expr: vector(1)
+# #    expr: nonexistent_metric
+#     for: 0m
+#     labels:
+#       severity: day
+#     annotations:
+#       description: "always-firing alert VALUE = {{ $value }}"
+
+
+
+###### BEGIN MISC NOTES ######
+
+#
+# other interesting exporters
+# https://github.com/prometheus-community/node-exporter-textfile-collector-scripts
+#
+
+# interesting post: https://www.metricfire.com/blog/top-5-prometheus-alertmanager-gotchas/
+
+# interesting promql query that could be useful later.
+# changes(ALERTS_FOR_STATE[24h])
+#
+#
+#
+# alert flap strategy.
+# https://roidelapluie.be/blog/2019/02/21/prometheus-last/
+#
+# Another idea generally is to make an alert that fires for 24 hours and
+# inhibits another alert for the same thing, which we want at most
+# 1 alert per 24 hours.
+
+###### END MISC NOTES ######
+
+
+
+
+# alerting on missing metrics:
+# https://www.robustperception.io/absent-alerting-for-scraped-metrics
+# that doesnt work if we want to alert across multiple hosts, eg
+# up{job="node"} == 1 unless node_systemd_unit_state{name="systemstatus.service",state="active",job="node"}
+# however, google lead me to a solution here
+# https://www.linkedin.com/pulse/prometheus-alert-missing-metrics-labels-nirav-shah
+# there is also the absent() function, but i didnt see a way to make that work
+  - alert: mysers_units_missing
+    expr: |-
+      count(up{job="node"}) by (instance) * 3 unless count(node_systemd_unit_state{name=~"(systemstatus|btrfsmaintstop|dynamicipupdate).service",state="active"}) by (instance)
+    for: 20m
+    labels:
+      severity: warn
+
+  - alert: mysers_not_active
+    expr: |-
+      node_systemd_unit_state{name=~"(systemstatus|btrfsmaintstop|dynamicipupdate).service",state="active"} != 1
+    for: 20m
+    labels:
+      severity: warn
+
+  - alert: sysd_result_fail
+    expr: |-
+      rate(node_systemd_unit_result_fail_count[30m]) > 0
+    labels:
+      severity: day
+
+
+  - alert: mailtest_check
      expr: |-
        time() - mailtest_check_last_usec > 60 * 12
      labels:
        severity: day
      annotations:
-      description: '{{ $labels.instance }} mailtest-check'
-      summary: '{{ $labels.instance }} mailtest-check'
+      summary: '12 minutes down'
  
    # 42 mins: enough for a 30 min queue run plus 12
-  - alert: mailtest-check
+  - alert: mailtest_check
      expr: |-
        time() - mailtest_check_last_usec > 60 * 42
      labels:
        severity: prod
      annotations:
-      description: '{{ $labels.instance }} mailtest-check'
-      summary: '{{ $labels.instance }} mailtest-check'
+      summary: '43 minutes down'
  
    - alert: 1pmtest
-    expr: hour() == 18 and minute() < 5
+    expr: hour() == 17 and minute() < 5
      for: 0m
      labels:
        severity: daytest
      annotations:
-      summary: Prometheus daily test alert (instance {{ $labels.instance }})
-      description: "Prometheus daily test alert if no other alerts. It
-    is an end to end test.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      summary: Prometheus daily test alert
+
+
+
+# alternate expression, to calculate if the alert would have fired is:
+#  min_over_time(sum_over_time(up[30m])[1d:]) == 0
+#  where 30m matches the for: time in target_down
+#
+# sum_over_time is not needed, just convenience for graphing
+  - alert: target_down_inhibitor
+    expr: |-
+      sum_over_time(ALERTS{alertname="target_down"}[1d])
+    labels:
+      severity: ignore
+    annotations:
+      summary: alert that indicates target_down alert fired in the last day
+      description: "VALUE = {{ $value }}"
+
+# For targets where we alert except for longer downtimes, we
+# still want to know if it is going down many times for short times over
+# a long period of time. But ignore reboots.
+#
+## Another way would be to detect an overall downtime:
+# avg_over_time(node_systemd_unit_state{name="dynamicipupdate.service",state="active"}[1d]) < .95
+  - alert: up_resets
+    expr: |-
+      resets(up[3d]) - changes(node_boot_time_seconds[3d]) > 15
+    labels:
+      severity: warn
+    annotations:
+      summary: "Target has gone down {{ $value }} times in 3 days, > 15"
+
+
  
  
  # https://awesome-prometheus-alerts.grep.to/rules
@@ -56,27 +151,30 @@ groups:
        severity: day
      annotations:
        summary: Prometheus job missing (instance {{ $labels.instance }})
-      description: "A Prometheus job has disappeared\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "A Prometheus job has disappeared\n  VALUE = {{ $value }}"
  
-  - alert: PrometheusTargetMissing
+# TODO: some hosts, notably li and MAIL_HOST, we want to alert sooner than 30m,
+# and severity to day. mail host is tricky since it roams, but I think the
+# right way to do it is to check for absence of this metric:
+# mailtest_check_last_usec{folder="/m/md/l/testignore",from="ian@iankelling.org"}
+  - alert: target_down
      expr: up == 0
      for: 30m
      labels:
        severity: warn
      annotations:
-      summary: Prometheus target missing (instance {{ $labels.instance }})
-      description: "A Prometheus target has disappeared. An exporter might be crashed.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      summary: Target down for 30m
  
-      # todo: this should supress the above alert
-  # - alert: PrometheusAllTargetsMissing
-  #   expr: count by (job) (up) == 0
-  #   for: 30m
-  #   labels:
-  #     severity: day
-  #     alert-group: local-prom
-  #   annotations:
-  #     summary: Prometheus all targets missing (instance {{ $labels.instance }})
-  #     description: "A Prometheus job does not have living target anymore.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    # todo: this should group with the above alert
+  - alert: PrometheusAllTargetsMissing
+    expr: count by (job) (up) == 0
+    for: 10m
+    labels:
+      severity: day
+#      alert-group: local-prom
+    annotations:
+      description: "A Prometheus job does not have living target anymore.\n  VALUE = {{ $value }}"
  
    - alert: PrometheusConfigurationReloadFailure
      expr: prometheus_config_last_reload_successful != 1
@@ -84,20 +182,15 @@ groups:
      labels:
        severity: day
      annotations:
-      summary: Prometheus configuration reload failure (instance {{ $labels.instance }})
-      description: "Prometheus configuration reload error\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-      # I have an out of band alert to make sure prometheus is up. this
-      # looks like it would generate false positives. todo: think
-      # through what a valid crash loop detection would look like.
-  # - alert: PrometheusTooManyRestarts
-  #   expr: changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[15m]) > 10
-  #   for: 0m
-  #   labels:
-  #     severity: warning
-  #   annotations:
-  #     summary: Prometheus too many restarts (instance {{ $labels.instance }})
-  #     description: "Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus configuration reload error\n  VALUE = {{ $value }}"
+
+  - alert: PrometheusTooManyRestarts
+    expr: changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[30m]) > 10
+    for: 0m
+    labels:
+      severity: warning
+    annotations:
+      description: "Prometheus has restarted more than ten times in the last 30 minutes. It might be crashlooping.\n  VALUE = {{ $value }}"
  
    - alert: PrometheusAlertmanagerJobMissing
      expr: absent(up{job="alertmanager"})
@@ -105,8 +198,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus AlertManager job missing (instance {{ $labels.instance }})
-      description: "A Prometheus AlertManager job has disappeared\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "A Prometheus AlertManager job has disappeared\n  VALUE = {{ $value }}"
  
    - alert: PrometheusAlertmanagerConfigurationReloadFailure
      expr: alertmanager_config_last_reload_successful != 1
@@ -114,8 +206,7 @@ groups:
      labels:
        severity: day
      annotations:
-      summary: Prometheus AlertManager configuration reload failure (instance {{ $labels.instance }})
-      description: "AlertManager configuration reload error\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "AlertManager configuration reload error\n  VALUE = {{ $value }}"
  
    - alert: PrometheusNotConnectedToAlertmanager
      expr: prometheus_notifications_alertmanagers_discovered < 1
@@ -123,8 +214,7 @@ groups:
      labels:
        severity: day
      annotations:
-      summary: Prometheus not connected to alertmanager (instance {{ $labels.instance }})
-      description: "Prometheus cannot connect the alertmanager\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus cannot connect the alertmanager\n  VALUE = {{ $value }}"
  
    - alert: PrometheusRuleEvaluationFailures
      expr: increase(prometheus_rule_evaluation_failures_total[3m]) > 0
@@ -132,8 +222,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus rule evaluation failures (instance {{ $labels.instance }})
-      description: "Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\n  VALUE = {{ $value }}"
  
    - alert: PrometheusTemplateTextExpansionFailures
      expr: increase(prometheus_template_text_expansion_failures_total[3m]) > 0
@@ -141,8 +230,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus template text expansion failures (instance {{ $labels.instance }})
-      description: "Prometheus encountered {{ $value }} template text expansion failures\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus encountered {{ $value }} template text expansion failures\n  VALUE = {{ $value }}"
  
    - alert: PrometheusRuleEvaluationSlow
      expr: prometheus_rule_group_last_duration_seconds > prometheus_rule_group_interval_seconds
@@ -150,8 +238,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus rule evaluation slow (instance {{ $labels.instance }})
-      description: "Prometheus rule evaluation took more time than the scheduled interval. It indicates a slower storage backend access or too complex query.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus rule evaluation took more time than the scheduled interval. It indicates a slower storage backend access or too complex query.\n  VALUE = {{ $value }}"
  
    - alert: PrometheusNotificationsBacklog
      expr: min_over_time(prometheus_notifications_queue_length[30m]) > 0
@@ -159,8 +246,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus notifications backlog (instance {{ $labels.instance }})
-      description: "The Prometheus notification queue has not been empty for 10 minutes\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "The Prometheus notification queue has not been empty for 10 minutes\n  VALUE = {{ $value }}"
  
    - alert: PrometheusAlertmanagerNotificationFailing
      expr: rate(alertmanager_notifications_failed_total[1m]) > 0
@@ -168,8 +254,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus AlertManager notification failing (instance {{ $labels.instance }})
-      description: "Alertmanager is failing sending notifications\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Alertmanager is failing sending notifications\n  VALUE = {{ $value }}"
  
    # file_sd doesnt count as service discovery, so 0 is expected.
    # - alert: PrometheusTargetEmpty
@@ -178,8 +263,7 @@ groups:
    #   labels:
    #     severity: day
    #   annotations:
-  #     summary: Prometheus target empty (instance {{ $labels.instance }})
-  #     description: "Prometheus has no target in service discovery\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+  #     description: "Prometheus has no target in service discovery\n  VALUE = {{ $value }}"
  
    - alert: PrometheusTargetScrapingSlow
      expr: prometheus_target_interval_length_seconds{quantile="0.9"} > 90
@@ -187,8 +271,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus target scraping slow (instance {{ $labels.instance }})
-      description: "Prometheus is scraping exporters slowly\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus is scraping exporters slowly\n  VALUE = {{ $value }}"
  
    - alert: PrometheusLargeScrape
      expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total[10m]) > 10
@@ -196,8 +279,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus large scrape (instance {{ $labels.instance }})
-      description: "Prometheus has many scrapes that exceed the sample limit\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus has many scrapes that exceed the sample limit\n  VALUE = {{ $value }}"
  
    - alert: PrometheusTargetScrapeDuplicate
      expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total[5m]) > 0
@@ -205,8 +287,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus target scrape duplicate (instance {{ $labels.instance }})
-      description: "Prometheus has many samples rejected due to duplicate timestamps but different values\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus has many samples rejected due to duplicate timestamps but different values\n  VALUE = {{ $value }}"
  
    - alert: PrometheusTsdbCheckpointCreationFailures
      expr: increase(prometheus_tsdb_checkpoint_creations_failed_total[1m]) > 0
@@ -214,8 +295,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus TSDB checkpoint creation failures (instance {{ $labels.instance }})
-      description: "Prometheus encountered {{ $value }} checkpoint creation failures\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus encountered {{ $value }} checkpoint creation failures\n  VALUE = {{ $value }}"
  
    - alert: PrometheusTsdbCheckpointDeletionFailures
      expr: increase(prometheus_tsdb_checkpoint_deletions_failed_total[1m]) > 0
@@ -223,8 +303,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus TSDB checkpoint deletion failures (instance {{ $labels.instance }})
-      description: "Prometheus encountered {{ $value }} checkpoint deletion failures\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus encountered {{ $value }} checkpoint deletion failures\n  VALUE = {{ $value }}"
  
    - alert: PrometheusTsdbCompactionsFailed
      expr: increase(prometheus_tsdb_compactions_failed_total[1m]) > 0
@@ -232,8 +311,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus TSDB compactions failed (instance {{ $labels.instance }})
-      description: "Prometheus encountered {{ $value }} TSDB compactions failures\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus encountered {{ $value }} TSDB compactions failures\n  VALUE = {{ $value }}"
  
    - alert: PrometheusTsdbHeadTruncationsFailed
      expr: increase(prometheus_tsdb_head_truncations_failed_total[1m]) > 0
@@ -241,8 +319,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus TSDB head truncations failed (instance {{ $labels.instance }})
-      description: "Prometheus encountered {{ $value }} TSDB head truncation failures\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus encountered {{ $value }} TSDB head truncation failures\n  VALUE = {{ $value }}"
  
    - alert: PrometheusTsdbReloadFailures
      expr: increase(prometheus_tsdb_reloads_failures_total[1m]) > 0
@@ -250,8 +327,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus TSDB reload failures (instance {{ $labels.instance }})
-      description: "Prometheus encountered {{ $value }} TSDB reload failures\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus encountered {{ $value }} TSDB reload failures\n  VALUE = {{ $value }}"
  
    - alert: PrometheusTsdbWalCorruptions
      expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) > 0
@@ -259,8 +335,7 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus TSDB WAL corruptions (instance {{ $labels.instance }})
-      description: "Prometheus encountered {{ $value }} TSDB WAL corruptions\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus encountered {{ $value }} TSDB WAL corruptions\n  VALUE = {{ $value }}"
  
    - alert: PrometheusTsdbWalTruncationsFailed
      expr: increase(prometheus_tsdb_wal_truncations_failed_total[1m]) > 0
@@ -268,5 +343,4 @@ groups:
      labels:
        severity: warn
      annotations:
-      summary: Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }})
-      description: "Prometheus encountered {{ $value }} TSDB WAL truncation failures\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      description: "Prometheus encountered {{ $value }} TSDB WAL truncation failures\n  VALUE = {{ $value }}"
diff --git a/filesystem/etc/systemd/system/btrfsmaintstop.service b/filesystem/etc/systemd/system/btrfsmaintstop.service

index 5e8160dc48eeee97a11792814684c6f7609e62a6..5ddec1708c2c2fdec224ca7136830bc5b19985e5 100644 (file)
--- a/filesystem/etc/systemd/system/btrfsmaintstop.service
+++ b/filesystem/etc/systemd/system/btrfsmaintstop.service
@@ -5,9 +5,9 @@ StartLimitIntervalSec=0
  
  [Service]
  Type=simple
-ExecStart=/usr/local/bin/sysd-mail-once -10 btrfsmaintstop /usr/local/bin/btrfsmaint check
+ExecStart=/usr/local/bin/btrfsmaint check
  Restart=always
  RestartSec=600
  
  [Install]
-WantedBy=grahical.target
+WantedBy=graphical.target
diff --git a/filesystem/etc/systemd/system/dynamicipupdate.service b/filesystem/etc/systemd/system/dynamicipupdate.service

index 54b04f9222630dc0cc95cfbffc56a5fc4229e8be..302f7f4ee53b90717df388bd3318c52c5f224c10 100644 (file)
--- a/filesystem/etc/systemd/system/dynamicipupdate.service
+++ b/filesystem/etc/systemd/system/dynamicipupdate.service
@@ -5,9 +5,9 @@ StartLimitIntervalSec=0
  
  [Service]
  Type=simple
-ExecStart=/usr/local/bin/sysd-mail-once -40 dynamicipupdate /usr/local/bin/dynamic-ip-update
+ExecStart=/usr/local/bin/dynamic-ip-update
  Restart=always
  RestartSec=600
  
  [Install]
-WantedBy=grahical.target
+WantedBy=graphical.target
diff --git a/filesystem/etc/systemd/system/epanicclean.service b/filesystem/etc/systemd/system/epanicclean.service

index bc79520b1eb999a8034d7699f1f76db2771b7387..5a0167e81b6966806f4963eeca09b5badb12116f 100644 (file)
--- a/filesystem/etc/systemd/system/epanicclean.service
+++ b/filesystem/etc/systemd/system/epanicclean.service
@@ -10,4 +10,4 @@ Restart=always
  RestartSec=600
  
  [Install]
-WantedBy=grahical.target
+WantedBy=graphical.target
diff --git a/filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf b/filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf

new file mode 100644 (file)

index 0000000..403672e
--- /dev/null
+++ b/filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf
@@ -0,0 +1,11 @@
+# apparently alertmanager failes to start when the internet is down:
+# Mar 10 13:20:09 kd prometheus-alertmanager[2719]: level=error ts=2022-03-10T18:20:09.907Z caller=main.go:243 msg="unable to initialize gossip mesh" err="create memberlist: Failed to get final advertise address: No private IP address found, and explicit IP not provided"
+
+[Unit]
+# needed to continually restart
+StartLimitIntervalSec=0
+
+[Service]
+Restart=always
+# time to sleep before restarting a service
+RestartSec=300
diff --git a/filesystem/etc/systemd/system/prometheus.d/restart.conf b/filesystem/etc/systemd/system/prometheus.d/restart.conf

new file mode 100644 (file)

index 0000000..aa2ea84
--- /dev/null
+++ b/filesystem/etc/systemd/system/prometheus.d/restart.conf
@@ -0,0 +1,8 @@
+[Unit]
+# needed to continually restart
+StartLimitIntervalSec=0
+
+[Service]
+Restart=always
+# time to sleep before restarting a service
+RestartSec=600
diff --git a/filesystem/etc/systemd/system/systemstatus.service b/filesystem/etc/systemd/system/systemstatus.service

index e21a4a330b5639cf7d79f16ec4a950448438ed2d..eb216619369673a3a43324513e3a85ffd58cc701 100644 (file)
--- a/filesystem/etc/systemd/system/systemstatus.service
+++ b/filesystem/etc/systemd/system/systemstatus.service
@@ -6,7 +6,7 @@ After=local-fs.target
  [Service]
  Type=simple
  Environment=XDG_RUNTIME_DIR=/run/user/1000
-ExecStart=/usr/local/bin/sysd-mail-once -3 systemstatus /usr/local/bin/system-status
+ExecStart=/usr/local/bin/system-status
  IOSchedulingClass=idle
  CPUSchedulingPolicy=idle
  User=iank
@@ -16,4 +16,4 @@ RestartSec=600
  
  
  [Install]
-WantedBy=grahical.target
+WantedBy=graphical.target
diff --git a/filesystem/usr/local/bin/myterm b/filesystem/usr/local/bin/myterm

new file mode 100755 (executable)

index 0000000..0ccb39c
--- /dev/null
+++ b/filesystem/usr/local/bin/myterm
@@ -0,0 +1,14 @@
+#!/bin/sh
+# using bin/sh for speed since the script is very simpl.e
+
+# these system76 systems have garbled display with konsole
+# and some other apps like mumble. something about the intel
+# graphics i think.
+case $HOSTNAME in
+  sy|bo)
+    exec sakura "$@"
+    ;;
+  *)
+    exec konsole "$@"
+    ;;
+esac
diff --git a/filesystem/usr/local/bin/myupgrade b/filesystem/usr/local/bin/myupgrade

index c206f8f7888127295a9e08864d616360122d2b12..fb8d1d05643c05b9ffb87a13d96024a1de74ce14 100755 (executable)
--- a/filesystem/usr/local/bin/myupgrade
+++ b/filesystem/usr/local/bin/myupgrade
@@ -25,7 +25,7 @@ d() {
    if [[ $DEBUG ]]; then
      pee cat "wall -n"
    else
-    echo ok | sed 's/^/myupgrade /' | pee logger "wall -n"
+    sed 's/^/myupgrade /' | pee logger "wall -n"
    fi
  }
  myreboot() {
@@ -52,8 +52,9 @@ myreboot() {
  # We should figure some workaround.
  
  if ! out=$(/sbin/needrestart -p 2>&1); then
-  printf "%s\n\n" "$out"
-  if [[ $HOSTNAME != "$MAIL_HOST" ]]; then
+  if [[ $HOSTNAME == "$MAIL_HOST" ]]; then
+    needrestart -r l
+  else
      myreboot
    fi
  fi
diff --git a/i3-sway/common.conf b/i3-sway/common.conf

index 0e49aec505d9af2f7b2d585f6d61c5d50af56c4b..29a25c4ab277b4b70aa5749c98f71e7157b2d6bb 100644 (file)
--- a/i3-sway/common.conf
+++ b/i3-sway/common.conf
@@ -81,7 +81,7 @@ bindsym $mod+9 workspace 10
  bindsym $mod+Shift+m border toggle
  
  bindsym $mod+j exec emacsclient -c
-bindsym $mod+k exec kitty
+bindsym $mod+k exec myterm
  bindsym $mod+l exec dmenu_run
  # note default is 27% on my system76. not sure if these
  # keybinds will screw up other laptop brightness keys.
diff --git a/install-my-scripts b/install-my-scripts

index cb5435057993d96d92ca3e11576138e727a88b39..c6f960af8e029cc9b4d078f3f36e6af29fbff0a2 100755 (executable)
--- a/install-my-scripts
+++ b/install-my-scripts
@@ -38,7 +38,6 @@ x="$(readlink -f -- "${BASH_SOURCE[0]}")"; cd ${x%/*} # directory of this file
  rsync -t --chmod=755 --chown=root:root switch-mail-host btrbk-run mount-latest-subvol \
        check-subvol-stale myi3status mailtest-check \
        mailbindwatchdog \
-      /a/bin/log-quiet/sysd-mail-once \
        check-mailq \
        unsaved-buffers.el \
        mail-backup-clean \
diff --git a/filesystem/etc/default/prometheus-node-exporter b/machine_specific/kd/filesystem/etc/default/prometheus-node-exporter

similarity index 100%

rename from filesystem/etc/default/prometheus-node-exporter

rename to machine_specific/kd/filesystem/etc/default/prometheus-node-exporter
diff --git a/mail-setup b/mail-setup

index 71b086b72b85f4621a0fa10686b9b866f1ce43b2..0a41df0b195180a7a07d41121394a8db6abeaf1d 100755 (executable)
--- a/mail-setup
+++ b/mail-setup
@@ -660,6 +660,7 @@ fi
  
  case $HOSTNAME in
    $MAIL_HOST)
+    # todo, should this be after vpn service
      i /etc/systemd/system/unbound.service.d/nn.conf <<EOF
  [Unit]
  After=mailnn.service
@@ -1053,6 +1054,16 @@ expertpathologyreview.com * F,1d,10m;F,14d,1h
  je.b8.nz * F,1d,10m;F,14d,1h
  zroe.org * F,1d,10m;F,14d,1h
  eximbackup.b8.nz * F,1d,4m;F,14d,1h
+
+# The spec says the target domain will be used for temporary host errors,
+# but i've found that isn't correct, the hostname is required
+# at least sometimes.
+nn.b8.nz * F,1d,4m;F,14d,1h
+defaultnn.b8.nz * F,1d,4m;F,14d,1h
+mx.iankelling.org * F,1d,4m;F,14d,1h
+bk.b8.nz * F,1d,4m;F,14d,1h
+eggs.gnu.org * F,1d,4m;F,14d,1h
+mail.fsf.org * F,1d,15m;F,14d,1h
  EOF
  
  
@@ -1521,7 +1532,7 @@ case $HOSTNAME in
      # sieve has the benefit of being supported in postfix and
      # proprietary/weird environments, so there is more examples on the
      # internet.
-    pi dovecot-core dovecot-imapd dovecot-sieve dovecot-lmtpd dovecot-sqlite sqlite3
+    pi-nostart dovecot-core dovecot-imapd dovecot-sieve dovecot-lmtpd dovecot-sqlite sqlite3
  
      for f in /p/c{/machine_specific/$HOSTNAME,}/filesystem/etc/dovecot/users; do
        if [[ -e $f ]]; then
@@ -1866,6 +1877,7 @@ if [[ $HOSTNAME == bk ]]; then
    ### end composer install
  
    rcdirs=(/usr/local/lib/rcexpertpath /usr/local/lib/rcninja)
+  ncdirs=(/var/www/ncninja)
    ncdirs=(/var/www/ncexpertpath /var/www/ncninja)
    # point debian cronjob to our local install, preventing daily cron error
  
@@ -2985,8 +2997,8 @@ elif [[ $uid != 608 ]]; then
    m usermod -u 608 Debian-exim
    m groupmod -g 608 Debian-exim
    m usermod -g 608 Debian-exim
-  m find / /nocow -path ./var/tmp -prune -o -xdev -uid $uid -execdir chown -h 608 {} +
-  m find / /nocow -path ./var/tmp -prune -o -xdev -gid $gid -execdir chgrp -h 608 {} +
+  m find / /nocow -xdev -path ./var/tmp -prune -o -uid $uid -execdir chown -h 608 {} +
+  m find / /nocow -xdev -path ./var/tmp -prune -o -gid $gid -execdir chgrp -h 608 {} +
  fi
  
  # * start / stop services
@@ -3111,7 +3123,6 @@ MAILTO=daylerts@iankelling.org
  */5  * * * *   root timeout 290 mailtest-check slow |& log-once -4 mailtest-check
  # if a bounce happened yesterday, dont let it slip through the cracks
  8   1 * * *   root export MAILTO=alerts@iankelling.org; awk '\$5 == "**"' /var/log/exim4/mainlog.1
-0   13 * * *  root echo "If the 1pm doesnt happen, you are in the matrix. Wake up."
  EOF
      m sudo rsync -ahhi --chown=root:root --chmod=0755 \
        /b/ds/mailtest-check /b/ds/check-remote-mailqs /usr/local/bin/
@@ -3121,6 +3132,7 @@ EOF
      test_to="testignore@expertpathologyreview.com, testignore@je.b8.nz, testignore@amnimal.ninja, jtuttle@gnu.org"
  
      cat >>/etc/cron.d/mailtest <<EOF
+0   13 * * *  root echo "1pm alert. You are not in the matrix."
  2   * * * *   root check-remote-mailqs |& log-once check-remote-mailqs
  EOF
      ;;&
diff --git a/mount-latest-subvol b/mount-latest-subvol

index 894ece520546cf9b099072ba00327df8b12ea5b0..d6af39259c7460ab24cd35f78caaf6cac54d07aa 100644 (file)
--- a/mount-latest-subvol
+++ b/mount-latest-subvol
@@ -440,7 +440,7 @@ done
  
  
  
-for dir in /mnt/r7/amy/{root,boot}_ubuntubionic /mnt/{root2/root,boot2/boot}_ubuntubionic; do
+for dir in /mnt/r7/amy/{root/root,boot/boot}_ubuntubionic /mnt/{root2/root,boot2/boot}_ubuntubionic; do
    vol=${dir##*/}
    root_dir=${dir%/*}
    if [[ ! -d $root_dir ]]; then
diff --git a/pkgs b/pkgs

index 029e22835e37bbdbee6b5d24f469bbb719e93e31..df991760f378c3f00be7f11114cf75709bbacd9d 100644 (file)
--- a/pkgs
+++ b/pkgs
@@ -7,13 +7,15 @@ p1=(
    cryptsetup
    lvm2
    mbuffer
+  moreutils
    screen
  )
  p2=(
    bash-completion
    curl
-  ethtool
    eatmydata
+  etckeeper
+  ethtool
    fping
    git
    haveged
@@ -99,7 +101,6 @@ p3=(
    python3-dnspython
    duplicity
    elinks
-  etckeeper
    evince
    exim4-doc-html
    exfat-fuse
@@ -173,7 +174,7 @@ p3=(
    memtester
    metastore
    mhonarc
-  moreutils
+  mmdebstrap
    mps-youtube
    mpv
    mumble
@@ -185,6 +186,7 @@ p3=(
    ncftp
    nginx-doc
    nmap
+  nyancat
    obs-studio
    offlineimap
    oathtool
@@ -236,6 +238,7 @@ p3=(
    tmate
    transmission-remote-gtk
    trash-cli
+  tty-clock
    vlc
    wamerican-huge
    wireless-tools
diff --git a/primary-setup b/primary-setup

index 7699b17548f4a18af89def047744c399892c1f66..01ddd536f7ea7f424fbfbbe2839d3f91f710f30b 100755 (executable)
--- a/primary-setup
+++ b/primary-setup
@@ -34,6 +34,11 @@ EOF
  fi
  source /a/bin/bash_unpublished/source-state
  
+# fixing up a bad state that servers got in.
+if [[ -e /dev/shm/iank-status ]]; then
+  chown iank.iank /dev/shm/iank-status
+fi
+
  
  if dpkg -s rss2email &>/dev/null; then
    if [[ $HOSTNAME == "$MAIL_HOST" ]]; then
diff --git a/rootsshsync b/rootsshsync

index fa36a563898ea2ca6b100acb4f0fc85cb705e660..900a6628aa3979784844433c30efabb3a0807f45 100755 (executable)
--- a/rootsshsync
+++ b/rootsshsync
@@ -59,7 +59,7 @@ if [[ -e $user_ssh_dir/config ]]; then
  fi
  chown -R root:root /root/.ssh
  
-rsync -t --chmod=755 --chown=root:root /b/ds/hssh /usr/local/bin
+rsync -t --chmod=755 --chown=root:root /a/bin/ds/hssh /usr/local/bin
  
  if [[ -e /a/opt/btrbk/ssh_filter_btrbk.sh ]]; then
    install /a/opt/btrbk/ssh_filter_btrbk.sh /usr/local/bin
diff --git a/subdir_files/.config/i3/config b/subdir_files/.config/i3/config

index b1260307998074b2235b3e7419ba1c453b3430c3..01b5c2c99ea4bb1c78b3a55585b658f500c71283 100644 (file)
--- a/subdir_files/.config/i3/config
+++ b/subdir_files/.config/i3/config
@@ -81,7 +81,7 @@ bindsym $mod+9 workspace 10
  bindsym $mod+Shift+m border toggle
  
  bindsym $mod+j exec emacsclient -c
-bindsym $mod+k exec kitty
+bindsym $mod+k exec myterm
  bindsym $mod+l exec dmenu_run
  # note default is 27% on my system76. not sure if these
  # keybinds will screw up other laptop brightness keys.
diff --git a/subdir_files/.config/sakura/sakura.conf b/subdir_files/.config/sakura/sakura.conf

index 3081b59761c58f99d3c3c3a90eea60df810182b0..8854f2260299cb668dae238e5ebebcd05ce52827 100644 (file)
--- a/subdir_files/.config/sakura/sakura.conf
+++ b/subdir_files/.config/sakura/sakura.conf
@@ -65,3 +65,5 @@ icon_file=terminal-tango.svg
  use_fading=false
  scrollable_tabs=true
  word_chars=-,./?%&#_~:
+search_accelerator=5
+search_key=F
diff --git a/subdir_files/.config/sway/config b/subdir_files/.config/sway/config

index f96355ab88aea2cb7596386343e1dadf881163a1..ba675c220713fb0263c92f41e0cb38e631adc7ec 100644 (file)
--- a/subdir_files/.config/sway/config
+++ b/subdir_files/.config/sway/config
@@ -81,7 +81,7 @@ bindsym $mod+9 workspace 10
  bindsym $mod+Shift+m border toggle
  
  bindsym $mod+j exec emacsclient -c
-bindsym $mod+k exec kitty
+bindsym $mod+k exec myterm
  bindsym $mod+l exec dmenu_run
  # note default is 27% on my system76. not sure if these
  # keybinds will screw up other laptop brightness keys.
diff --git a/system-status b/system-status

index 07c730d71a7338db5a9343d359a857829a481cad..f50d238b7d6523acc14c65765503894a52811916 100755 (executable)
--- a/system-status
+++ b/system-status
@@ -30,27 +30,38 @@ v() {
  }
  # log-once COUNT NAME [MESSAGE]
  lo() {
-  /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" root@localhost
+  if type -p ifne &>/dev/null; then
+    /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" root@localhost
+  fi
  }
  
  loday() {
-  /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" daylerts@iankelling.org
+  if type -p ifne &>/dev/null; then
+    /usr/local/bin/log-once "$@" | ifne mail -s "$HOSTNAME: system-status $2" daylert@iankelling.org
+  fi
  }
  
  # todo, consider migrating some of these alerts into prometheus
  write-status() {
    chars=("${first_chars[@]}")
  
-  services=(
-    epanicclean
-    systemstatus
-    btrfsmaintstop
-    dynamicipupdate
-  )
+
+  services=( epanicclean )
+  case $HOSTNAME in
+    bk|je|li) : ;;
+    *)
+      services+=(
+        systemstatus
+        btrfsmaintstop
+        dynamicipupdate
+      )
+      ;;
+  esac
+
    bads=()
-  if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$'; then
+  if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then
      for s in ${services[@]}; do
-      if [[ $(systemctl show -p SubState --value $s) != running ]]; then
+      if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then
          bads+=($s)
        fi
      done
@@ -67,9 +78,9 @@ write-status() {
    case $HOSTNAME in
      kd)
        bads=()
-      if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$'; then
+      if systemctl show -p SubState --value ${services[@]} | egrep -v '^(running|)$' &>/dev/null; then
          for s in ${services[@]}; do
-          if [[ $(systemctl show -p SubState --value $s) != running ]]; then
+          if [[ $(systemctl show -p SubState --value $s 2>&1) != running ]]; then
              bads+=($s)
            fi
          done
@@ -143,7 +154,7 @@ write-status() {
      chars+=(A)
    fi
  
-  glob=(/m/md/daylerts/{new,cur}/!(*,S))
+  glob=(/m/md/daylert/{new,cur}/!(*,S))
    if [[ -e ${glob[0]} ]]; then
      chars+=(DAY)
    fi
@@ -175,12 +186,13 @@ write-status() {
    esac
  
    begin=false
-  if ! make -C /b/ds -q ~/.local/distro-begin || [[ $(<~/.local/distro-begin) != 0 ]]; then
+
+  if ! make -C /b/ds -q ~/.local/distro-begin 2>/dev/null || [[ $(<~/.local/distro-begin) != 0 ]]; then
      begin=true
    fi
  
    end=false
-  if ! make -C /b/ds -q ~/.local/distro-end || [[ $(<~/.local/distro-end) != 0 ]]; then
+  if ! make -C /b/ds -q ~/.local/distro-end 2>/dev/null || [[ $(<~/.local/distro-end) != 0 ]]; then
      end=true
    fi
  
@@ -261,7 +273,10 @@ write-status() {
      # leave it up to epanic-clean to send email notification
    fi
  
-  source /a/bin/bash_unpublished/source-state
+  if [[ -e /a/bin/bash_unpublished/source-state ]]; then
+    # /a gets remounted due to btrbk, ignore error code for file doesnt exist
+    source /a/bin/bash_unpublished/source-state || [[ $? == 1 ]]
+  fi
    if [[ $MAIL_HOST == "$HOSTNAME" ]]; then
      bbkmsg=
      if [[ $(systemctl is-active btrbk.timer) != active ]]; then
@@ -304,10 +319,14 @@ write-status() {
      lo -1 old-snapshot $snapshotmsg
    fi
  
-  cat /a/bin/bash_unpublished/source-state >$status_file
+  if [[ ! -e $status_file || -w $status_file ]]; then
+    if [[ -e /a/bin/bash_unpublished/source-state ]]; then
+      cat /a/bin/bash_unpublished/source-state >$status_file
+    fi
  
-  if [[ ${chars[*]} ]]; then
-    echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file
+    if [[ ${chars[*]} ]]; then
+      echo "ps_char=\"${chars[*]} \$ps_char\"" >>$status_file
+    fi
    fi
  
  }
author	Ian Kelling <ian@iankelling.org>
	Fri, 25 Mar 2022 06:25:24 +0000 (02:25 -0400)
committer	Ian Kelling <ian@iankelling.org>
	Fri, 25 Mar 2022 06:25:24 +0000 (02:25 -0400)
bitfolk-chroot-install	[new file with mode: 0755]	patch \| blob
bitfolk-rescue-init	[new file with mode: 0644]	patch \| blob
bitfolk-rescue-install	[new file with mode: 0755]	patch \| blob
bk-backup		patch \| blob \| history
brc		patch \| blob \| history
brc2		patch \| blob \| history
btrfsmaint		patch \| blob \| history
check-remote-mailqs		patch \| blob \| history
conflink		patch \| blob \| history
distro-begin		patch \| blob \| history
distro-end		patch \| blob \| history
dynamic-ip-update		patch \| blob \| history
filesystem/etc/default/prometheus		patch \| blob \| history
filesystem/etc/default/prometheus-alertmanager		patch \| blob \| history
filesystem/etc/prometheus/alertmanager_templates/iank.tmpl	[new file with mode: 0644]	patch \| blob
filesystem/etc/prometheus/file_sd/node.yml		patch \| blob \| history
filesystem/etc/prometheus/file_sd/tlsnode.yml		patch \| blob \| history
filesystem/etc/prometheus/rules/iank.yml		patch \| blob \| history
filesystem/etc/systemd/system/btrfsmaintstop.service		patch \| blob \| history
filesystem/etc/systemd/system/dynamicipupdate.service		patch \| blob \| history
filesystem/etc/systemd/system/epanicclean.service		patch \| blob \| history
filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf	[new file with mode: 0644]	patch \| blob
filesystem/etc/systemd/system/prometheus.d/restart.conf	[new file with mode: 0644]	patch \| blob
filesystem/etc/systemd/system/systemstatus.service		patch \| blob \| history
filesystem/usr/local/bin/myterm	[new file with mode: 0755]	patch \| blob
filesystem/usr/local/bin/myupgrade		patch \| blob \| history
i3-sway/common.conf		patch \| blob \| history
install-my-scripts		patch \| blob \| history
machine_specific/kd/filesystem/etc/default/prometheus-node-exporter	[moved from filesystem/etc/default/prometheus-node-exporter with 100% similarity]	patch \| blob \| history
mail-setup		patch \| blob \| history
mount-latest-subvol		patch \| blob \| history
pkgs		patch \| blob \| history
primary-setup		patch \| blob \| history
rootsshsync		patch \| blob \| history
subdir_files/.config/i3/config		patch \| blob \| history
subdir_files/.config/sakura/sakura.conf		patch \| blob \| history
subdir_files/.config/sway/config		patch \| blob \| history
system-status		patch \| blob \| history