From: Ian Kelling Date: Wed, 20 Apr 2022 01:32:12 +0000 (-0400) Subject: mostly fixes and improvements X-Git-Url: https://iankelling.org/git/?a=commitdiff_plain;h=2a1cee2e73d9291dde9af831bbe9e996199b7cbc;p=distro-setup mostly fixes and improvements --- diff --git a/brc b/brc index 502be7e..733a7c4 100644 --- a/brc +++ b/brc @@ -2027,7 +2027,7 @@ if [[ $- == *i* ]]; then if [[ -e /dev/shm/iank-status && ! -e /tmp/quiet-status ]]; then eval $(< /dev/shm/iank-status) fi - if [[ ! $SSH_CLIENT && $MAIL_HOST != "$HOSTNAME" ]]; then + if [[ $MAIL_HOST && $MAIL_HOST != "$HOSTNAME" ]]; then ps_char="@ $ps_char" fi # We could test if sudo is active with sudo -nv diff --git a/distro-end b/distro-end index 6e86d21..434ca2a 100755 --- a/distro-end +++ b/distro-end @@ -1403,6 +1403,7 @@ tu /etc/schroot/desktop/fstab <<'EOF' /run/user/0 /run/user/0 none rw,bind 0 0 EOF +# todo: consider if this should use the new sysd-prom-fail sd /etc/systemd/system/schrootupdate.service <<'EOF' [Unit] Description=schrootupdate @@ -1888,8 +1889,7 @@ esac case $HOSTNAME in kd) - # ive got these + a needed dependency pinned to bullseye, just to get - # versions more in line with the main docs. + /a/bin/buildscripts/prometheus # Font awesome is needed for the alertmanager ui. pi prometheus-alertmanager prometheus prometheus-node-exporter fonts-font-awesome web-conf -p 9091 -f 9090 - apache2 i.b8.nz <<'EOF' @@ -1902,6 +1902,18 @@ AuthUserFile "/etc/prometheus-htpasswd" Require valid-user EOF + + web-conf -p 9094 -f 9093 - apache2 i.b8.nz <<'EOF' + +AuthType Basic +AuthName "basic_auth" +# created with +# htpasswd -c prometheus-htpasswd USERNAME +AuthUserFile "/etc/prometheus-htpasswd" +Require valid-user + +EOF + # by default, the alertmanager web ui is not enabled other than a page # that suggests to use the amtool cli. that tool is good, but you cant # silence things nearly as fast. @@ -1916,31 +1928,6 @@ EOF sysd-prom-fail-install $ser done - ## get upstream because it has the react ui, which has localtime, and general better usability. - ## begin get latest upstream prometheus ### - cd /a/opt/promdl - url=$(curl -s https://api.github.com/repos/prometheus/prometheus/releases/latest | jq -r '.assets[].browser_download_url | match(".*linux-amd64.tar.gz$").string') - f=${url##*/} - if [[ -e $f ]]; then - timestamp=$(stat -c %Y $f) - else - timestamp=0 - fi - m wget -nv -N $url - new_timestamp=$(stat -c %Y $f) - if [[ $timestamp != $new_timestamp || ! -e /usr/local/bin/prometheus ]]; then - ngset - to_rm=( !($f) ) - ngreset - if (( ${#to_rm[@]} )); then - rm -rf ${to_rm[@]} - fi - m ex $f - dir=${f%.tar.gz} - s install $dir/prometheus $dir/promtool /usr/local/bin - fi - ## end get latest upstream prometheus ### - ;; *) pi prometheus-node-exporter @@ -1969,8 +1956,7 @@ Require valid-user EOF # For work, i think we will just use the firewall for hosts in the main data center, and - # apache/nginx + tls + basic auth outside of it. or consider stunnel. - + # vpn for hosts outside it. # TODO: figure out how to detect the ping failure and try again. diff --git a/dynamic-ip-update b/dynamic-ip-update index b7c60f5..f377293 100755 --- a/dynamic-ip-update +++ b/dynamic-ip-update @@ -11,13 +11,16 @@ main() { up4=false - if ! read -r _ _ gateway _ ifdev _ < <(ip -4 route get 85.119.83.50 2>/dev/null); then - # if our internet is down, just give up, no need to have an error - if [[ ! $INVOCATION_ID ]]; then + if ! tmp=$(ip -4 route get 85.119.83.50 2>/dev/null); then + # our internet is down + if [[ $INVOCATION_ID ]]; then + return 0 + else echo $0: failed to get route, giving up + exit 0 fi - exit 0 fi + read -r _ _ gateway _ ifdev _ <<<"$tmp" case $gateway in 10.2.0.1) diff --git a/filesystem/etc/default/prometheus-alertmanager b/filesystem/etc/default/prometheus-alertmanager index b77df74..0c36d69 100644 --- a/filesystem/etc/default/prometheus-alertmanager +++ b/filesystem/etc/default/prometheus-alertmanager @@ -10,70 +10,78 @@ # it doesn't wait for network.target, and gives this error message: # component=cluster err="couldn't deduce an advertise address: no private IP found, explicit advertise addr not provided" +# config.file and storage.path are set to match the debian package -ARGS="--cluster.listen-address= --web.listen-address=127.0.0.1:9093" +ARGS="--cluster.listen-address= +--config.file=/etc/prometheus/alertmanager.yml +--storage.path=/var/lib/prometheus/alertmanager/ +--web.listen-address=127.0.0.1:9093" # this file is from version 0.21 # The alert manager supports the following options: -# --config.file="/etc/prometheus/alertmanager.yml" -# Alertmanager configuration file name. -# --storage.path="/var/lib/prometheus/alertmanager/" -# Base path for data storage. -# --data.retention=120h -# How long to keep data for. -# --alerts.gc-interval=30m -# Interval between alert GC. -# --log.level=info -# Only log messages with the given severity or above. -# --web.external-url=WEB.EXTERNAL-URL -# The URL under which Alertmanager is externally reachable (for example, -# if Alertmanager is served via a reverse proxy). Used for generating -# relative and absolute links back to Alertmanager itself. If the URL has -# a path portion, it will be used to prefix all HTTP endpoints served by -# Alertmanager. If omitted, relevant URL components will be derived -# automatically. -# --web.route-prefix=WEB.ROUTE-PREFIX -# Prefix for the internal routes of web endpoints. Defaults to path of -# --web.external-url. -# --web.listen-address=":9093" -# Address to listen on for the web interface and API. -# --web.ui-path="/usr/share/prometheus/alertmanager/ui/" -# Path to static UI directory. -# --template.default="/usr/share/prometheus/alertmanager/default.tmpl" -# Path to default notification template. -# --cluster.listen-address="0.0.0.0:9094" -# Listen address for cluster. -# --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS -# Explicit address to advertise in cluster. -# --cluster.peer=CLUSTER.PEER ... -# Initial peers (may be repeated). -# --cluster.peer-timeout=15s -# Time to wait between peers to send notifications. -# --cluster.gossip-interval=200ms -# Interval between sending gossip messages. By lowering this value (more -# frequent) gossip messages are propagated across the cluster more -# quickly at the expense of increased bandwidth. -# --cluster.pushpull-interval=1m0s -# Interval for gossip state syncs. Setting this interval lower (more -# frequent) will increase convergence speeds across larger clusters at -# the expense of increased bandwidth usage. -# --cluster.tcp-timeout=10s Timeout for establishing a stream connection -# with a remote node for a full state sync, and for stream read and write -# operations. -# --cluster.probe-timeout=500ms -# Timeout to wait for an ack from a probed node before assuming it is -# unhealthy. This should be set to 99-percentile of RTT (round-trip time) -# on your network. -# --cluster.probe-interval=1s -# Interval between random node probes. Setting this lower (more frequent) -# will cause the cluster to detect failed nodes more quickly at the -# expense of increased bandwidth usage. -# --cluster.settle-timeout=1m0s -# Maximum time to wait for cluster connections to settle before -# evaluating notifications. -# --cluster.reconnect-interval=10s -# Interval between attempting to reconnect to lost peers. -# --cluster.reconnect-timeout=6h0m0s -# Length of time to attempt to reconnect to a lost peer. + +# --config.file="alertmanager.yml" +# Alertmanager configuration file name. +# --storage.path="data/" Base path for data storage. +# --data.retention=120h How long to keep data for. +# --alerts.gc-interval=30m Interval between alert GC. +# --web.external-url=WEB.EXTERNAL-URL +# The URL under which Alertmanager is externally reachable (for +# example, if Alertmanager is served via a reverse proxy). Used +# for generating relative and absolute links back to +# Alertmanager itself. If the URL has a path portion, it will +# be used to prefix all HTTP endpoints served by Alertmanager. +# If omitted, relevant URL components will be derived +# automatically. +# --web.route-prefix=WEB.ROUTE-PREFIX +# Prefix for the internal routes of web endpoints. Defaults to +# path of --web.external-url. +# --web.listen-address=":9093" +# Address to listen on for the web interface and API. +# --web.get-concurrency=0 Maximum number of GET requests processed concurrently. If +# negative or zero, the limit is GOMAXPROC or 8, whichever is +# larger. +# --web.timeout=0 Timeout for HTTP requests. If negative or zero, no timeout is +# set. +# --cluster.listen-address="0.0.0.0:9094" +# Listen address for cluster. Set to empty string to disable HA +# mode. +# --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS +# Explicit address to advertise in cluster. +# --cluster.peer=CLUSTER.PEER ... +# Initial peers (may be repeated). +# --cluster.peer-timeout=15s +# Time to wait between peers to send notifications. +# --cluster.gossip-interval=200ms +# Interval between sending gossip messages. By lowering this +# value (more frequent) gossip messages are propagated across +# the cluster more quickly at the expense of increased +# bandwidth. +# --cluster.pushpull-interval=1m0s +# Interval for gossip state syncs. Setting this interval lower +# (more frequent) will increase convergence speeds across +# larger clusters at the expense of increased bandwidth usage. +# --cluster.tcp-timeout=10s Timeout for establishing a stream connection with a remote +# node for a full state sync, and for stream read and write +# operations. +# --cluster.probe-timeout=500ms +# Timeout to wait for an ack from a probed node before assuming +# it is unhealthy. This should be set to 99-percentile of RTT +# (round-trip time) on your network. +# --cluster.probe-interval=1s +# Interval between random node probes. Setting this lower (more +# frequent) will cause the cluster to detect failed nodes more +# quickly at the expense of increased bandwidth usage. +# --cluster.settle-timeout=1m0s +# Maximum time to wait for cluster connections to settle before +# evaluating notifications. +# --cluster.reconnect-interval=10s +# Interval between attempting to reconnect to lost peers. +# --cluster.reconnect-timeout=6h0m0s +# Length of time to attempt to reconnect to a lost peer. +# --log.level=info Only log messages with the given severity or above. One of: +# [debug, info, warn, error] +# --log.format=logfmt Output format of log messages. One of: [logfmt, json] +# --version Show application version. diff --git a/filesystem/etc/systemd/logind.conf.d/iank.conf b/filesystem/etc/systemd/logind.conf.d/iank.conf index 0ddbf9f..77ced7d 100644 --- a/filesystem/etc/systemd/logind.conf.d/iank.conf +++ b/filesystem/etc/systemd/logind.conf.d/iank.conf @@ -1,4 +1,7 @@ # See logind.conf(5) for details. + +# A version of this file is duplicated in fai in order to get the bootstrap +# distro to stop suspending when the lid is closed. [Login] HandleLidSwitch=ignore # seems like a good idea. diff --git a/filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf b/filesystem/etc/systemd/system/prometheus-alertmanager.service.d/override.conf similarity index 82% rename from filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf rename to filesystem/etc/systemd/system/prometheus-alertmanager.service.d/override.conf index 403672e..0f8f2a9 100644 --- a/filesystem/etc/systemd/system/prometheus-alertmanager.d/restart.conf +++ b/filesystem/etc/systemd/system/prometheus-alertmanager.service.d/override.conf @@ -9,3 +9,7 @@ StartLimitIntervalSec=0 Restart=always # time to sleep before restarting a service RestartSec=300 + +# empty signifies to replace the existing value +ExecStart= +ExecStart=/usr/local/bin/alertmanager $ARGS diff --git a/filesystem/usr/local/bin/myupgrade b/filesystem/usr/local/bin/myupgrade index 32dd7ff..7973d67 100755 --- a/filesystem/usr/local/bin/myupgrade +++ b/filesystem/usr/local/bin/myupgrade @@ -48,6 +48,13 @@ myreboot() { } +case $HOSTNAME in + kd) + /a/bin/buildscripts/prometheus + ;; +esac + + # TODO: executed from cron, this doesnt sent an email when we reboot, # because rebooting from the script stops the ability to send email. # We should figure some workaround. diff --git a/filesystem/usr/local/bin/myupgrade-iank b/filesystem/usr/local/bin/myupgrade-iank index ea610ab..e93d63e 100755 --- a/filesystem/usr/local/bin/myupgrade-iank +++ b/filesystem/usr/local/bin/myupgrade-iank @@ -33,6 +33,7 @@ if $has_x; then fi + # source /a/bin/distro-setup/path-add-function # export GOPATH=$HOME/go # path-add $GOPATH/bin diff --git a/mail-setup b/mail-setup index effa665..ea2b1f2 100755 --- a/mail-setup +++ b/mail-setup @@ -3,6 +3,9 @@ # Copyright (C) 2019 Ian Kelling # SPDX-License-Identifier: AGPL-3.0-or-later + +# todo: install new alertmanager, like new prometheus + # todo: setup a logrotate for /var/log/mymain and mypanic # todo: setup an alert for bouncing test emails. diff --git a/schrootupdate b/schrootupdate index 533e087..8d3d1e9 100755 --- a/schrootupdate +++ b/schrootupdate @@ -12,3 +12,30 @@ for n in bullseye; do schroot -c $n -- apt-get -y dist-upgrade --purge --auto-remove fi done + +# if we haven't upgraded yet +if [[ ! -d /mnt/boot/debianbullseye_bootstrap ]]; then + exit 0 +fi + +dev=$(awk '$2 == "/mnt/boot" {print $1}' /etc/mtab) +if [[ ! $dev ]]; then + exit 0 +fi +mkdir -p /mnt/tmptimer +mount -o subvol=debianbullseye_bootstrap $dev /mnt/tmptimer +cd /mnt/tmptimer +for d in dev proc sys dev/pts; do + [[ -d $d ]] + if ! mountpoint $d &>/dev/null; then + mount -o bind /$d $d + fi +done +chroot . apt-get -y update +chroot . apt-get -y dist-upgrade --purge --auto-remove + +for d in dev/pts dev proc sys; do + if mountpoint $d &>/dev/null; then + umount $d + fi +done diff --git a/subdir_files/.gnupg/gpg.conf b/subdir_files/.gnupg/gpg.conf index ea28038..0dbd896 100644 --- a/subdir_files/.gnupg/gpg.conf +++ b/subdir_files/.gnupg/gpg.conf @@ -39,8 +39,8 @@ default-key B125F60B7B287FF6A2B7DF8F170AF0E2954295DF # DO NOT USE THIS ONE. #keyserver hkp://pool.sks-keyservers.net -#keyserver hkp://keys.openpgp.org -keyserver hkp://pgp.mit.edu +keyserver hkp://keys.openpgp.org +#keyserver hkp://pgp.mit.edu #keyserver hkp://keyserver.pgp.com #keyserver hkp://ipv4.pool.sks-keyservers.net #keyserver hkp://keys.gnupg.net