mostly fixes and improvements

author Ian Kelling <ian@iankelling.org>

Wed, 20 Apr 2022 01:32:12 +0000 (21:32 -0400)

committer Ian Kelling <ian@iankelling.org>

Wed, 20 Apr 2022 01:32:12 +0000 (21:32 -0400)
author Ian Kelling <ian@iankelling.org>
Wed, 20 Apr 2022 01:32:12 +0000 (21:32 -0400)
committer Ian Kelling <ian@iankelling.org>
Wed, 20 Apr 2022 01:32:12 +0000 (21:32 -0400)
diff --git a/brc b/brc

index 502be7e4e03a3f768c90bb08e6aad34cccb38492..733a7c4482529204c9d067806c993d5e80954af1 100644 (file)
--- a/brc
+++ b/brc
@@ -2027,7 +2027,7 @@ if [[ $- == *i* ]]; then
      if [[ -e /dev/shm/iank-status && ! -e /tmp/quiet-status ]]; then
        eval $(< /dev/shm/iank-status)
      fi
-    if [[ ! $SSH_CLIENT && $MAIL_HOST != "$HOSTNAME" ]]; then
+    if [[ $MAIL_HOST && $MAIL_HOST != "$HOSTNAME" ]]; then
        ps_char="@ $ps_char"
      fi
      # We could test if sudo is active with sudo -nv
diff --git a/distro-end b/distro-end

index 6e86d21a8487225e12b2cf82f0cdab43271cf56b..434ca2aec0a5e0f923d773e1ac1f4b515e04f72f 100755 (executable)
--- a/distro-end
+++ b/distro-end
@@ -1403,6 +1403,7 @@ tu /etc/schroot/desktop/fstab <<'EOF'
  /run/user/0    /run/user/0     none    rw,bind         0       0
  EOF
  
+# todo: consider if this should use the new sysd-prom-fail
  sd /etc/systemd/system/schrootupdate.service <<'EOF'
  [Unit]
  Description=schrootupdate
@@ -1888,8 +1889,7 @@ esac
  
  case $HOSTNAME in
    kd)
-    # ive got these + a needed dependency pinned to bullseye, just to get
-    # versions more in line with the main docs.
+    /a/bin/buildscripts/prometheus
      # Font awesome is needed for the alertmanager ui.
      pi prometheus-alertmanager prometheus prometheus-node-exporter fonts-font-awesome
      web-conf -p 9091 -f 9090 - apache2 i.b8.nz <<'EOF'
@@ -1902,6 +1902,18 @@ AuthUserFile "/etc/prometheus-htpasswd"
  Require valid-user
  </Location>
  EOF
+
+    web-conf -p 9094 -f 9093 - apache2 i.b8.nz <<'EOF'
+<Location "/">
+AuthType Basic
+AuthName "basic_auth"
+# created with
+# htpasswd -c prometheus-htpasswd USERNAME
+AuthUserFile "/etc/prometheus-htpasswd"
+Require valid-user
+</Location>
+EOF
+
      # by default, the alertmanager web ui is not enabled other than a page
      # that suggests to use the amtool cli. that tool is good, but you cant
      # silence things nearly as fast.
@@ -1916,31 +1928,6 @@ EOF
        sysd-prom-fail-install $ser
      done
  
-    ## get upstream because it has the react ui, which has localtime, and general better usability.
-    ## begin get latest upstream prometheus ###
-    cd /a/opt/promdl
-    url=$(curl -s https://api.github.com/repos/prometheus/prometheus/releases/latest | jq -r '.assets[].browser_download_url | match(".*linux-amd64.tar.gz$").string')
-    f=${url##*/}
-    if [[ -e $f ]]; then
-      timestamp=$(stat -c %Y $f)
-    else
-      timestamp=0
-    fi
-    m wget -nv -N $url
-    new_timestamp=$(stat -c %Y $f)
-    if [[ $timestamp != $new_timestamp || ! -e /usr/local/bin/prometheus ]]; then
-      ngset
-      to_rm=( !($f) )
-      ngreset
-      if (( ${#to_rm[@]} )); then
-        rm -rf ${to_rm[@]}
-      fi
-      m ex $f
-      dir=${f%.tar.gz}
-      s install $dir/prometheus $dir/promtool /usr/local/bin
-    fi
-    ## end get latest upstream prometheus ###
-
      ;;
    *)
      pi prometheus-node-exporter
@@ -1969,8 +1956,7 @@ Require valid-user
  </Location>
  EOF
      # For work, i think we will just use the firewall for hosts in the main data center, and
-    # apache/nginx + tls + basic auth outside of it. or consider stunnel.
-
+    # vpn for hosts outside it.
  
      # TODO: figure out how to detect the ping failure and try again.
  
diff --git a/dynamic-ip-update b/dynamic-ip-update

index b7c60f520eb23904c7684414d735f7bd8b91e520..f377293e900b2987609587f3be1710330c1e30e2 100755 (executable)
--- a/dynamic-ip-update
+++ b/dynamic-ip-update
@@ -11,13 +11,16 @@ main() {
  
    up4=false
  
-  if ! read -r _ _  gateway _ ifdev _ < <(ip -4 route get 85.119.83.50 2>/dev/null); then
-    # if our internet is down, just give up, no need to have an error
-    if [[ ! $INVOCATION_ID ]]; then
+  if ! tmp=$(ip -4 route get 85.119.83.50 2>/dev/null); then
+    # our internet is down
+    if [[ $INVOCATION_ID ]]; then
+      return 0
+    else
        echo $0: failed to get route, giving up
+      exit 0
      fi
-    exit 0
    fi
+  read -r _ _  gateway _ ifdev _ <<<"$tmp"
  
    case $gateway in
      10.2.0.1)
diff --git a/filesystem/etc/default/prometheus-alertmanager b/filesystem/etc/default/prometheus-alertmanager

index b77df7471e5c4df2e210120e5ced4a1e64c2bfef..0c36d690d0eca477d4269c28e2d9a8a3a6109cae 100644 (file)
--- a/filesystem/etc/default/prometheus-alertmanager
+++ b/filesystem/etc/default/prometheus-alertmanager
@@ -10,70 +10,78 @@
  # it doesn't wait for network.target, and gives this error message:
  #  component=cluster err="couldn't deduce an advertise address: no private IP found, explicit advertise addr not provided"
  
+# config.file and storage.path are set to match the debian package
  
-ARGS="--cluster.listen-address= --web.listen-address=127.0.0.1:9093"
+ARGS="--cluster.listen-address=
+--config.file=/etc/prometheus/alertmanager.yml
+--storage.path=/var/lib/prometheus/alertmanager/
+--web.listen-address=127.0.0.1:9093"
  
  # this file is from version 0.21
  
  # The alert manager supports the following options:
  
-#  --config.file="/etc/prometheus/alertmanager.yml"
-#       Alertmanager configuration file name.
-#  --storage.path="/var/lib/prometheus/alertmanager/"
-#       Base path for data storage.
-#  --data.retention=120h
-#       How long to keep data for.
-#  --alerts.gc-interval=30m
-#       Interval between alert GC.
-#  --log.level=info
-#       Only log messages with the given severity or above.
-#  --web.external-url=WEB.EXTERNAL-URL
-#       The URL under which Alertmanager is externally reachable (for example,
-#       if Alertmanager is served via a reverse proxy). Used for generating
-#       relative and absolute links back to Alertmanager itself. If the URL has
-#       a path portion, it will be used to prefix all HTTP endpoints served by
-#       Alertmanager. If omitted, relevant URL components will be derived
-#       automatically.
-#  --web.route-prefix=WEB.ROUTE-PREFIX
-#       Prefix for the internal routes of web endpoints. Defaults to path of
-#       --web.external-url.
-#  --web.listen-address=":9093"
-#       Address to listen on for the web interface and API.
-#  --web.ui-path="/usr/share/prometheus/alertmanager/ui/"
-#       Path to static UI directory.
-#  --template.default="/usr/share/prometheus/alertmanager/default.tmpl"
-#       Path to default notification template.
-#  --cluster.listen-address="0.0.0.0:9094"
-#       Listen address for cluster.
-#  --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS
-#       Explicit address to advertise in cluster.
-#  --cluster.peer=CLUSTER.PEER ...
-#       Initial peers (may be repeated).
-#  --cluster.peer-timeout=15s
-#       Time to wait between peers to send notifications.
-#  --cluster.gossip-interval=200ms
-#       Interval between sending gossip messages. By lowering this value (more
-#       frequent) gossip messages are propagated across the cluster more
-#       quickly at the expense of increased bandwidth.
-#  --cluster.pushpull-interval=1m0s
-#       Interval for gossip state syncs. Setting this interval lower (more
-#       frequent) will increase convergence speeds across larger clusters at
-#       the expense of increased bandwidth usage.
-#  --cluster.tcp-timeout=10s  Timeout for establishing a stream connection
-#       with a remote node for a full state sync, and for stream read and write
-#       operations.
-#  --cluster.probe-timeout=500ms
-#       Timeout to wait for an ack from a probed node before assuming it is
-#       unhealthy. This should be set to 99-percentile of RTT (round-trip time)
-#       on your network.
-#  --cluster.probe-interval=1s
-#       Interval between random node probes. Setting this lower (more frequent)
-#       will cause the cluster to detect failed nodes more quickly at the
-#       expense of increased bandwidth usage.
-#  --cluster.settle-timeout=1m0s
-#       Maximum time to wait for cluster connections to settle before
-#       evaluating notifications.
-#  --cluster.reconnect-interval=10s
-#       Interval between attempting to reconnect to lost peers.
-#  --cluster.reconnect-timeout=6h0m0s
-#       Length of time to attempt to reconnect to a lost peer.
+
+# --config.file="alertmanager.yml"
+#                            Alertmanager configuration file name.
+# --storage.path="data/"     Base path for data storage.
+# --data.retention=120h      How long to keep data for.
+# --alerts.gc-interval=30m   Interval between alert GC.
+# --web.external-url=WEB.EXTERNAL-URL
+#                            The URL under which Alertmanager is externally reachable (for
+#                            example, if Alertmanager is served via a reverse proxy). Used
+#                            for generating relative and absolute links back to
+#                            Alertmanager itself. If the URL has a path portion, it will
+#                            be used to prefix all HTTP endpoints served by Alertmanager.
+#                            If omitted, relevant URL components will be derived
+#                            automatically.
+# --web.route-prefix=WEB.ROUTE-PREFIX
+#                            Prefix for the internal routes of web endpoints. Defaults to
+#                            path of --web.external-url.
+# --web.listen-address=":9093"
+#                            Address to listen on for the web interface and API.
+# --web.get-concurrency=0    Maximum number of GET requests processed concurrently. If
+#                            negative or zero, the limit is GOMAXPROC or 8, whichever is
+#                            larger.
+# --web.timeout=0            Timeout for HTTP requests. If negative or zero, no timeout is
+#                            set.
+# --cluster.listen-address="0.0.0.0:9094"
+#                            Listen address for cluster. Set to empty string to disable HA
+#                            mode.
+# --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS
+#                            Explicit address to advertise in cluster.
+# --cluster.peer=CLUSTER.PEER ...
+#                            Initial peers (may be repeated).
+# --cluster.peer-timeout=15s
+#                            Time to wait between peers to send notifications.
+# --cluster.gossip-interval=200ms
+#                            Interval between sending gossip messages. By lowering this
+#                            value (more frequent) gossip messages are propagated across
+#                            the cluster more quickly at the expense of increased
+#                            bandwidth.
+# --cluster.pushpull-interval=1m0s
+#                            Interval for gossip state syncs. Setting this interval lower
+#                            (more frequent) will increase convergence speeds across
+#                            larger clusters at the expense of increased bandwidth usage.
+# --cluster.tcp-timeout=10s  Timeout for establishing a stream connection with a remote
+#                            node for a full state sync, and for stream read and write
+#                            operations.
+# --cluster.probe-timeout=500ms
+#                            Timeout to wait for an ack from a probed node before assuming
+#                            it is unhealthy. This should be set to 99-percentile of RTT
+#                            (round-trip time) on your network.
+# --cluster.probe-interval=1s
+#                            Interval between random node probes. Setting this lower (more
+#                            frequent) will cause the cluster to detect failed nodes more
+#                            quickly at the expense of increased bandwidth usage.
+# --cluster.settle-timeout=1m0s
+#                            Maximum time to wait for cluster connections to settle before
+#                            evaluating notifications.
+# --cluster.reconnect-interval=10s
+#                            Interval between attempting to reconnect to lost peers.
+# --cluster.reconnect-timeout=6h0m0s
+#                            Length of time to attempt to reconnect to a lost peer.
+# --log.level=info           Only log messages with the given severity or above. One of:
+#                            [debug, info, warn, error]
+# --log.format=logfmt        Output format of log messages. One of: [logfmt, json]
+# --version                  Show application version.
diff --git a/filesystem/etc/systemd/logind.conf.d/iank.conf b/filesystem/etc/systemd/logind.conf.d/iank.conf

index 0ddbf9f444a5c74b9426749ea96d01d62813f089..77ced7d41c9527147bbea3553416d7d8d86af0bc 100644 (file)
--- a/filesystem/etc/systemd/logind.conf.d/iank.conf
+++ b/filesystem/etc/systemd/logind.conf.d/iank.conf
@@ -1,4 +1,7 @@
  # See logind.conf(5) for details.
+
+# A version of this file is duplicated in fai in order to get the bootstrap
+# distro to stop suspending when the lid is closed.
  [Login]
  HandleLidSwitch=ignore
  # seems like a good idea.
diff --git a/filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf b/filesystem/etc/systemd/system/prometheus-alertmanager.service.d/override.conf

similarity index 82%

rename from filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf

rename to filesystem/etc/systemd/system/prometheus-alertmanager.service.d/override.conf

index 403672ea2a0839550b0535597ccf40a6e595b8f1..0f8f2a9ccbe29e987ab6a2cfb360a154e7353f7d 100644 (file)
--- a/filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf
+++ b/filesystem/etc/systemd/system/prometheus-alertmanager.service.d/override.conf
@@ -9,3 +9,7 @@ StartLimitIntervalSec=0
  Restart=always
  # time to sleep before restarting a service
  RestartSec=300
+
+# empty signifies to replace the existing value
+ExecStart=
+ExecStart=/usr/local/bin/alertmanager $ARGS
diff --git a/filesystem/usr/local/bin/myupgrade b/filesystem/usr/local/bin/myupgrade

index 32dd7fffddb13b2c1d9c38a63bfad22cdf90e5c3..7973d670fb707d6817a950cf1996242dbff4daf2 100755 (executable)
--- a/filesystem/usr/local/bin/myupgrade
+++ b/filesystem/usr/local/bin/myupgrade
@@ -48,6 +48,13 @@ myreboot() {
  }
  
  
+case $HOSTNAME in
+  kd)
+    /a/bin/buildscripts/prometheus
+    ;;
+esac
+
+
  # TODO: executed from cron, this doesnt sent an email when we reboot,
  # because rebooting from the script stops the ability to send email.
  # We should figure some workaround.
diff --git a/filesystem/usr/local/bin/myupgrade-iank b/filesystem/usr/local/bin/myupgrade-iank

index ea610ab89911d0ac5fa356fa1d93eb3ec5ce10d4..e93d63ebc21b663f24c4b730194f5929908b8690 100755 (executable)
--- a/filesystem/usr/local/bin/myupgrade-iank
+++ b/filesystem/usr/local/bin/myupgrade-iank
@@ -33,6 +33,7 @@ if $has_x; then
  fi
  
  
+
  # source /a/bin/distro-setup/path-add-function
  # export GOPATH=$HOME/go
  # path-add $GOPATH/bin
diff --git a/mail-setup b/mail-setup

index effa66597a49db2dc915bfaea126972bba8e1ba5..ea2b1f25ec13763495629935b965799c5e8d044a 100755 (executable)
--- a/mail-setup
+++ b/mail-setup
@@ -3,6 +3,9 @@
  # Copyright (C) 2019 Ian Kelling
  # SPDX-License-Identifier: AGPL-3.0-or-later
  
+
+# todo: install new alertmanager, like new prometheus
+
  # todo: setup a logrotate for /var/log/mymain and mypanic
  
  # todo: setup an alert for bouncing test emails.
diff --git a/schrootupdate b/schrootupdate

index 533e087d010fdf47dd73a4a63b44d49449f3439f..8d3d1e9ced18a9e8ba630197e7b94e72baa5472f 100755 (executable)
--- a/schrootupdate
+++ b/schrootupdate
@@ -12,3 +12,30 @@ for n in bullseye; do
      schroot -c $n -- apt-get -y dist-upgrade --purge --auto-remove
    fi
  done
+
+# if we haven't upgraded yet
+if [[ ! -d /mnt/boot/debianbullseye_bootstrap ]]; then
+  exit 0
+fi
+
+dev=$(awk '$2 == "/mnt/boot" {print $1}' /etc/mtab)
+if [[ ! $dev ]]; then
+  exit 0
+fi
+mkdir -p /mnt/tmptimer
+mount -o subvol=debianbullseye_bootstrap $dev /mnt/tmptimer
+cd /mnt/tmptimer
+for d in dev proc sys dev/pts; do
+  [[ -d $d ]]
+  if ! mountpoint $d &>/dev/null; then
+    mount -o bind /$d $d
+  fi
+done
+chroot . apt-get -y update
+chroot . apt-get -y dist-upgrade --purge --auto-remove
+
+for d in dev/pts dev proc sys; do
+  if mountpoint $d &>/dev/null; then
+    umount $d
+  fi
+done
diff --git a/subdir_files/.gnupg/gpg.conf b/subdir_files/.gnupg/gpg.conf

index ea280385c431fc0754baf45eb93359bbfcb01d30..0dbd8961613dcf4aade21ac4d553f19ba19b9137 100644 (file)
--- a/subdir_files/.gnupg/gpg.conf
+++ b/subdir_files/.gnupg/gpg.conf
@@ -39,8 +39,8 @@ default-key B125F60B7B287FF6A2B7DF8F170AF0E2954295DF
  # DO NOT USE THIS ONE.
  #keyserver hkp://pool.sks-keyservers.net
  
-#keyserver hkp://keys.openpgp.org
-keyserver hkp://pgp.mit.edu
+keyserver hkp://keys.openpgp.org
+#keyserver hkp://pgp.mit.edu
  #keyserver hkp://keyserver.pgp.com
  #keyserver hkp://ipv4.pool.sks-keyservers.net
  #keyserver hkp://keys.gnupg.net
author	Ian Kelling <ian@iankelling.org>
	Wed, 20 Apr 2022 01:32:12 +0000 (21:32 -0400)
committer	Ian Kelling <ian@iankelling.org>
	Wed, 20 Apr 2022 01:32:12 +0000 (21:32 -0400)
brc		patch \| blob \| history
distro-end		patch \| blob \| history
dynamic-ip-update		patch \| blob \| history
filesystem/etc/default/prometheus-alertmanager		patch \| blob \| history
filesystem/etc/systemd/logind.conf.d/iank.conf		patch \| blob \| history
filesystem/etc/systemd/system/prometheus-alertmanager.service.d/override.conf	[moved from filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf with 82% similarity]	patch \| blob \| history
filesystem/usr/local/bin/myupgrade		patch \| blob \| history
filesystem/usr/local/bin/myupgrade-iank		patch \| blob \| history
mail-setup		patch \| blob \| history
schrootupdate		patch \| blob \| history
subdir_files/.gnupg/gpg.conf		patch \| blob \| history