From: Ian Kelling Date: Sun, 14 Jul 2024 22:41:36 +0000 (-0400) Subject: hopefully a btrbk fix, misc improvements X-Git-Url: https://iankelling.org/git/?a=commitdiff_plain;h=368b884d608213573d4e97bb7179b4ff5b60d482;p=distro-setup hopefully a btrbk fix, misc improvements --- diff --git a/brc b/brc index 37733f3..8aded5d 100644 --- a/brc +++ b/brc @@ -665,20 +665,27 @@ ccomp time pd # The service is unaffected if our ssh connection dies, no need to run # in screen or tmux. # -# Note: The last few lines of any existing entries for a unit by that -# name will be output first, and there will be a few second delay at the -# start of the command, and a second or so at the end. +# Note: There will be a few second delay at the start of the command, +# and a second or so at the end. # # Note: Functions and aliases obviously won't work, we resolve the # command to a file. # -# Note: requires running as root. +# More comparison to others approaches: systemd-cat: fails if you do +# systemctl daemon-reload, and I've had other approaches. I haven't yet +# really tried logging with script, sudo, or screen. jdo() { - local cmd cmd_name jr_pid ret + local cmd cmd_name jr_pid ret sdrun_args sdrun_prefix ret=0 cmd="$1" shift if [[ $EUID != 0 ]]; then + # note, I don't use system --user because if it does sudo ssh, that + # will leave a process around that we can't kill and it will leave + # the unit hanging around in a failed state needing manual killing + # of the process. + sdrun_prefix=sudo + sdrun_args="--uid "$(id -u)" --gid "$(id -g)" -E SSH_AUTH_SOCK=/run/openssh_agent" echo "jdo: error: rerun as root" return 1 fi @@ -686,13 +693,13 @@ jdo() { if [[ $cmd != /* ]]; then cmd=$(type -P "$cmd") fi + #note date format for since is date '+%F %T' # -q = quiet - journalctl -qn2 -f -u "$cmd_name" & + journalctl --since=now -qn2 -f -u "$cmd_name" & jr_pid=$! - # Trial and error of time needed to avoid missing initial lines. - # .5 was not reliable. 1 was not reliable. 2 was not reliable - sleep 4 - systemd-run --unit "$cmd_name" --wait --collect "$cmd" "$@" || ret=$? + # todo: trial an error testing of the wait time here. + sleep 1 + $sdrun_prefix systemd-run $sdrun_args --unit "$cmd_name" --wait --collect "$cmd" "$@" || ret=$? # The sleep lets the journal output its last line # before the prompt comes up. sleep .5 @@ -723,6 +730,11 @@ tsf() { # usage: tsl LOG_PATH_PREFIX COMMAND... # example: tsl /root/command # log file will be like /root/command-2024-02-10.log +# +# Caveats: If there is a user prompt, like "read -p prompt var", it will +# hang without outputting the prompt. Sometimes some output will get +# hidden until you hit enter. +# tsl() { local log_prefix log_path appending ret if (( $# < 2 )); then @@ -2000,7 +2012,7 @@ jrfg() { bn _jrfg "$@" } jru() { - SYSTEMD_COLORS=true bn journalctl -n1000 -f -u "$@" ; + journalctl -nall -u "$@" ; } jrug() { _jrug() { diff --git a/brc2 b/brc2 index c4dfdea..c7de4ab 100644 --- a/brc2 +++ b/brc2 @@ -1658,82 +1658,6 @@ order by timeSent;" sqlite3 /p/cheogram/b ".mode tabs" "$q" | sed 's/ /./' | less } -# version of jdo for my non-root user -jdo() { - # comparison of alternative logging methods: - # - # systemd-run command (what this function does) - # - # If there is a user prompt, the program will detect that it is not - # connected to a terminal and act in a non-interactive way, skipping - # the prompt. This has the benefit that you know exactly how the - # program will act if you want to move it into a service that runs - # automatically. - # - # If run with sudo and command is a shell script which does a sleep, - # it can (sometimes?) output some extra whitespace in front of - # messages, more for each subsequent message. This can be avoided by - # becoming root first. - # - # It logs the command's pid and exit code, which is nice. - # - # - ### command |& ts | tee file.log - # - # If there is a user prompt, like "read -p prompt var", it will hang - # without outputting the prompt. - # - # I've had a few times where ts had an error and I wasn't totally sure - # if it was really the command or ts having the problem. - # - # Sometimes some output will get hidden until you hit enter. - # - # - ### command |& pee cat logger - # - # This seems to work. I need to test more. - # - # - ### command |& logger -s - # - # User prompts get confusingly prefixed to earlier output, and all log - # entries get prefixed with annoying priority level. - # - # - ### systemd-cat - # - # Had a few problems. One major one is that it exited in the middle of - # a command on systemctl daemon-reload - # - # Related commands which can log a whole session: script, sudo, screen - local cmd cmd_name jr_pid ret - ret=0 - cmd="$1" - shift - cmd_name=${cmd##*/} - if [[ $cmd != /* ]]; then - cmd=$(type -P "$cmd") - fi - #note date format for since is date '+%F %T' - # -q = quiet - journalctl --since=now -qn2 -f -u "$cmd_name" & - jr_pid=$! - # note, we could have a version that does system --user, but if for example - # it does sudo ssh, that will leave a process around that we can't kill - # and it will leave the unit hanging around in a failed state needing manual - # killing of the process. - s systemd-run --uid "$(id -u)" --gid "$(id -g)" \ - -E SSH_AUTH_SOCK=/run/openssh_agent \ - --unit "$cmd_name" --wait --collect "$cmd" "$@" || ret=$? - # The sleep lets the journal output its last line - # before the prompt comes up. - sleep .5 - kill $jr_pid &>/dev/null ||: - unset jr_pid - fg &>/dev/null ||: - # this avoids any err-catch - (( ret == 0 )) || return $ret -} # service run, and watch the output srun() { diff --git a/btrbk-run b/btrbk-run index c8aa989..f7a7c4a 100644 --- a/btrbk-run +++ b/btrbk-run @@ -30,6 +30,10 @@ set -e; . /usr/local/lib/bash-bear; set +e shopt -s nullglob +pre=btrbk-run +script_name="${BASH_SOURCE[0]}" +script_name="${script_name##*/}" + usage() { cat <<'EOF' btrbk-run [OPTIONS] [run|resume|archive] @@ -44,16 +48,6 @@ EOF exit $1 } - - -pre=btrbk-run - - - -script_name="${BASH_SOURCE[0]}" -script_name="${script_name##*/}" - - log-setup() { if [[ ! $log_path ]]; then mkdir -p /var/log/btrbk @@ -165,10 +159,9 @@ qconf() { # q has sensitive data i dont want to backup for so long cat >>/etc/btrbk$conf_suf.conf </etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf < 1 && tg_snap_count == orphan_mp_count )) + if (( tmp )) ; then + die "something went wrong checking orphans on $tg: for mountpoint ${mountpoints[$i]}, $orphan_mp_count" + fi + done +} + +# Note, this depends on write-config being called first. +# +# Delete any subvols on the receiving host that don't exist on the +# sending host. Otherwise, the receiving host could have snapshots that +# aren't on the sending side, and thus become odd leaf subvols, and then +# btrbk could try to use them when we sync back, creating a weird tree +# instead of linear parent/child relationship. Maybe this could lead to +# a missing source subvol error, so lets avoid it. +del-orphan-snaps() { + if [[ $source ]]; then + tmpstr=$(ssh root@$source "shopt -s nullglob; ${snap_list_cmds[*]}") + IFS=" " read -r -a source_snap_list <<<"$tmpstr" + for snap in "${source_snap_list[@]}"; do + source_snaps[$snap]=t + done + get-orphan-tg-snaps + tmp=$(( ${#orphan_tg_snaps[*]} >= 1 )) + if (( tmp )); then + d btrfs sub del ${orphan_tg_snaps[*]} + fi + else # we have targets + for tg in ${targets[@]}; do + tmp_str=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}") + mapfile -t tg_snaps <<<"$tmp_str" + get-orphan-tg-snaps + tmp=$(( ${#orphan_tg_snaps[*]} >= 1 )) + if (( tmp )); then + d ssh root@$tg "btrfs sub del ${orphan_tg_snaps[*]}" + fi + done + fi +} + +#### end functions #### + # latest $MAIL_HOST if [[ -e /b/bash_unpublished/source-state ]]; then source /b/bash_unpublished/source-state @@ -183,6 +351,7 @@ fi # note q is owned by root:1000 +declare -A source_snaps mountpoints=() rsync_mountpoint=/q @@ -192,6 +361,8 @@ ret=0 conf_only=false dry_run=false # mostly for testing rate_limit=no +# -q and just using the syslog option seemed nice, +# but it doesn't show when a send has a parent and when it doesn't. verbose=true; verbose_arg="-l trace" force=false if [[ $PPID == 1 ]]; then @@ -342,9 +513,14 @@ fi std_preserve="36h 14d 8w 24m" q_preserve="18h 14d 8w" +preserve_min=6h +prune=false case $cmd_arg in - run|resume) : ;; + run|resume) + # see notes at the end for why we set this. + prune=true + ;; # This works better than the normal archive command. We have to # specify the mount points, but that is what we are used to doing and @@ -355,9 +531,6 @@ case $cmd_arg in # time s btrbk -v archive /mnt/r7/amy/boot/btrbk ssh://bo/mnt/boot2/btrbk archive) cmd_arg=resume - std_preserve="999h 999d 999w 999m" - q_preserve="$std_preserve" - preserve_arg=-p ;; *) die "untested command arg" ;; esac @@ -680,6 +853,8 @@ df --output=size,pcent / | tail -n1" # This is a separate ssh because the command can fail and thatis ok. if ! $force; then locked=false + # note Invalid MIT-MAGIC-COOKIE-1 keyxscreensaver-command: can't open display :0 is expected if + # the system is locked by greeter. if lock_info=$($ssh_timeout ssh $h DISPLAY=:0 xscreensaver-command -time); then if [[ $lock_info != *non-blanked* ]]; then locked=true @@ -713,177 +888,34 @@ df --output=size,pcent / | tail -n1" fi -cat >/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf <>/etc/btrbk$conf_suf.conf < 1 && tg_snap_count == orphan_mp_count )) - if (( tmp )) ; then - die "something went wrong checking orphans on $tg: for mountpoint ${mountpoints[$i]}, $orphan_mp_count" - fi - done -} - -if [[ $source ]]; then - for snap in $(ssh root@$source "shopt -s nullglob; ${snap_list_cmds[*]}"); do - source_snaps[$snap]=t - done - get-orphan-tg-snaps - tmp=$(( ${#orphan_tg_snaps[*]} >= 1 )) - if (( tmp )); then - d btrfs sub del ${orphan_tg_snaps[*]} - fi -else # we have targets - for tg in ${targets[@]}; do - tmp_str=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}") - mapfile -t tg_snaps <<<"$tmp_str" - get-orphan-tg-snaps - tmp=$(( ${#orphan_tg_snaps[*]} >= 1 )) - if (( tmp )); then - d ssh root@$tg "btrfs sub del ${orphan_tg_snaps[*]}" - fi - done -fi - # todo: umount first to ensure we don't have any errors # todo: do some kill fuser stuff to make umount more reliable - -if $conf_only; then - mexit 0 +if $prune; then + preserve_min_this_run="$preserve_min" + write-config + # I'd have to do a bit more thinking, but maybe doing this here helps + # prune to decide to preserve the same snapshots on different + # hosts. Otherwise, this is redundant and not needed. + del-orphan-snaps + if $dry_run; then + m btrbk -c /etc/btrbk$conf_suf.conf -v -n prune + else + logq btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg prune + fi fi +preserve_min_this_run=all +write-config +del-orphan-snaps if $dry_run; then m btrbk -c /etc/btrbk$conf_suf.conf -v -n $cmd_arg mexit 0 +else + logq btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg $cmd_arg fi -# -q and just using the syslog option seemed nice, -# but it doesn't show when a send has a parent and when it doesn't. -logq btrbk -c /etc/btrbk$conf_suf.conf $preserve_arg $verbose_arg $progress_arg $cmd_arg if $early; then exit 0 @@ -921,9 +953,26 @@ fi ## run extra commands on targets + +local_snaps=$(${local_snap_list_cmds[*]}) if [[ $ret == 0 ]]; then for tg in ${targets[@]}; do h=$(ssh $tg hostname) + remote_snaps=$(ssh root@$tg "shopt -s nullglob; ${snap_list_cmds[*]}") + # a check like this will catch the situation we aim to prevent by running purge + if [[ $local_snaps != "$remote_snaps" ]]; then + localtmp=$(mktemp) + printf "%s\n" "$local_snaps" |tr ' ' '\n' >$localtmp + remotetmp=$(mktemp) + printf "%s\n" "$remote_snaps" |tr ' ' '\n' >$remotetmp + e "error: for $tg, remote and local snaps are different." + e "local: $local_snaps" + e "tg:$tg = $remote_snaps" + e "diff -u local remote" + diff -u $localtmp $remotetmp + rm $localtmp $remotetmp + ret=1 + fi if [[ $h == kd && $HOSTNAME == x3 && $HOSTNAME == "$MAIL_HOST" ]]; then d ssh root@$tg 'btrbk-spread-wrap &>/dev/null >> received subvolume (incremental) +# -------------------------------------------------------------------------------- +# /mnt/root/btest/x +# === /mnt/root/btest/btrbk/x.20240712T142254-0400 +# --- /mnt/root/btest/btrbk/x.20240711T142115-0400 +# >>> kd.b8.nz:/mnt/root/btest/btrbk/x.20240712T142254-0400 + +# NOTE: Dryrun was active, none of the operations above were actually executed! + +# ### end output + +# What we actually want is: + +# [send/receive] target: kd.b8.nz:/mnt/root/btest/btrbk/x.20240711T142115-0400 +# [send/receive] source: /mnt/root/btest/btrbk/x.20240711T142115-0400 +# [send/receive] parent: /mnt/root/btest/btrbk/x.20240711T141719-0400 + +# Note: I would expect btrbk -p to do this, but it actually works differently than preserve_min all + +# This is a good guess as to the source of my periodic clone source error, +# however, when I hit that error, and then ran btrfs with correctly ordered +# subvolumes, it did not fix the error. Deleting the parent subvolume on the +# target host did solve the error. But, it could have been due to this thing +# happening in an earlier send. I'll give it a year or so of testing. +# +# 2024-07-05 23:00:34 ___ [stderr] ERROR: clone: cannot find source subvol 3439b7f8-7130-e740-970c-9c21f5b1110b +# +# Note, the error is confusing because there exists a subvol with that +# uuid, but I'm pretty sure what it is actually saying is that it can't +# find some data it needs within that subvol when it expected to. At one +# point, I had figured out a way to see the exact file it was failing +# on, but last time I looked, I didn't see an easy way to do it. It +# might be in my post to linux-btrfs about this. diff --git a/distro-end b/distro-end index 56fc735..77ff9d8 100755 --- a/distro-end +++ b/distro-end @@ -747,7 +747,7 @@ EOF # Pin-Priority: 1001 # EOF # - # s fwupdmgr get-updates + # fwupdmgr get-updates # says I have 3 "devices with no available firmware updates" # if there were updates, install with: s fwupdmgr update @@ -756,7 +756,10 @@ EOF check_downgrade=true fi # note this installs a kernel - pi system76-firmware system76-driver fwupd + pi system76-firmware system76-driver + pi-nostart fwupd + # some other service starts it. note: this still needs testing. + ser mask fwupd if $check_downgrade && [[ -e /etc/apt/preferences.d/system76-apt-preferences ]]; then # driver installs a preferences file to give s76 packages # priority so we may need to downgrade here. diff --git a/filesystem/etc/cron.d/ian b/filesystem/etc/cron.d/ian index 64e338c..fb67cf9 100644 --- a/filesystem/etc/cron.d/ian +++ b/filesystem/etc/cron.d/ian @@ -15,24 +15,24 @@ # default is /bin/sh SHELL=/bin/bash # default is /usr/bin:/bin -PATH=/sbin:/usr/sbin:/usr/bin:/bin:/usr/local/bin:/a/exe +PATH=/sbin:/usr/sbin:/usr/bin:/bin:/usr/local/bin MAILTO=root */10 * * * * root rootsshsync |& log-once -15 rootsshsync # If theres any logged errors we didnt handle in 4 days, maybe we accidentally missed them, # so report if we did -4 9 * * 5 root /a/bin/ds/check-stale-alerts -4 10 * * 5 root /a/bin/ds/check-radicale -4 15 * * 5 iank /a/bin/ds/mailclean -14 * * * * root /a/bin/ds/bk-backup |& log-once -24 bk-backup +4 9 * * 5 root check-stale-alerts +4 10 * * 5 root check-radicale +4 15 * * 5 iank mailclean +14 * * * * root bk-backup |& log-once -24 bk-backup 0 7 * * * iank failmail myupgrade-iank 20 7 * * * root myupgrade |& log-once -1 myupgrade 20 5 * * * root prof-backup |& log-once -1 prof-backup 19 * * * * root check-crypttab -4 20 * * 5 iank /usr/local/bin/check-lets-encrypt-ssl-settings -4 21 * * 5 iank /b/ds/auto-commit-changes /a /p -4 23 * * 5 iank failmail /b/ds/eggdrop-upgrade +4 20 * * 5 iank check-lets-encrypt-ssl-settings +4 21 * * 5 iank auto-commit-changes /a /p +4 23 * * 5 iank failmail eggdrop-upgrade # avoid dnssec expirations. This is a hack, what we should # do instead is something like, sign only if expiration is diff --git a/filesystem/etc/udev/rules.d/99-kaleidoscope.rules b/filesystem/etc/udev/rules.d/99-kaleidoscope.rules index b238912..5fa3ffc 100644 --- a/filesystem/etc/udev/rules.d/99-kaleidoscope.rules +++ b/filesystem/etc/udev/rules.d/99-kaleidoscope.rules @@ -10,8 +10,8 @@ ## - https://github.com/systemd/systemd/issues/4288 ## - https://www.freedesktop.org/software/systemd/man/sd-login.html -# iank: substituted := for =, based on -# Jun 09 12:27:48 so systemd-udevd[1385]: /etc/udev/rules.d/99-kaleidoscope.rules:18 ENV key ta +## iank: substituted := for =, based on +## Jun 09 12:27:48 so systemd-udevd[1385]: /etc/udev/rules.d/99-kaleidoscope.rules:18 ENV key ta kes '==', '!=', '=', or '+=' operator, assuming '='. SUBSYSTEMS=="usb", ATTRS{idVendor}=="1209", ATTRS{idProduct}=="2300", SYMLINK+="model01", ENV{ID_MM_DEVICE_IGNORE}="1", ENV{ID_MM_CANDIDATE}="0", TAG+="uaccess", TAG+="seat" diff --git a/filesystem/usr/local/bin/fwupd-cron b/filesystem/usr/local/bin/fwupd-cron new file mode 100755 index 0000000..cb4d652 --- /dev/null +++ b/filesystem/usr/local/bin/fwupd-cron @@ -0,0 +1,21 @@ +#!/bin/bash +# I, Ian Kelling, follow the GNU license recommendations at +# https://www.gnu.org/licenses/license-recommendations.en.html. They +# recommend that small programs, < 300 lines, be licensed under the +# Apache License 2.0. This file contains or is part of one or more small +# programs. If a small program grows beyond 300 lines, I plan to change +# to a recommended GPL license. + +# Copyright 2024 Ian Kelling + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/mail-setup b/mail-setup index dcda9e9..d6aedb2 100755 --- a/mail-setup +++ b/mail-setup @@ -620,7 +620,7 @@ fi u /etc/systemd/system/mailclean.timer <<'EOF' [Unit] -Description=Run mailclean daily +Description=Run mailclean [Timer] OnCalendar=monthly @@ -637,7 +637,7 @@ After=multi-user.target [Service] User=$u Type=oneshot -ExecStart=/usr/local/bin/sysd-mail-once mailclean /a/bin/distro-setup/mailclean +ExecStart=/usr/local/bin/sysd-mail-once mailclean /usr/local/bin/mailclean EOF # * postgrey diff --git a/rshiank b/rshiank new file mode 100755 index 0000000..dd0fea4 --- /dev/null +++ b/rshiank @@ -0,0 +1,45 @@ +#!/bin/bash +set -eE -o pipefail +trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR + +# restricted ssh does not allow arguments, but they exist in $SSH_ORIGINAL_COMMAND +# debug +dfile=/tmp/rshiank +date >>$dfile +echo SSH_ORIGINAL_COMMAND: $SSH_ORIGINAL_COMMAND >>$dfile + +if [[ ! $SSH_ORIGINAL_COMMAND ]]; then + echo "no SSH_ORIGINAL_COMMAND" >&2 + exit 1 +fi + +mapfile -t cmds <<'EOF' +# allow rsyncing into my ~/tmp +rsync --server -re.iLsfxCIvu --log-format=X --partial . /home/iank/tmp +EOF +mapfile -t regex_cmds <<'EOF' +EOF + +allow=false +for c in "${regex_cmds[@]}"; do + if [[ $c == \#* ]]; then continue; fi + if [[ $SSH_ORIGINAL_COMMAND =~ $c ]]; then + allow=true + break + fi +done +if ! $allow; then + for c in "${cmds[@]}"; do + # echo "c $c" # debug + if [[ $c == \#* ]]; then continue; fi + if [[ $SSH_ORIGINAL_COMMAND == "$c" ]]; then + allow=true + break + fi + done +fi +if $allow; then + eval $SSH_ORIGINAL_COMMAND || exit $? +else + echo "rshiank: failed command: $SSH_ORIGINAL_COMMAND" | tee -a $dfile +fi diff --git a/script-files b/script-files index 4e8c413..79a9098 100644 --- a/script-files +++ b/script-files @@ -62,6 +62,12 @@ my_bin_files=( copyq-restart toggle-mute + + check-stale-alerts + check-radicale + mailclean + bk-backup + eggdrop-upgrade ) for f in /b/log-quiet/*; do diff --git a/switch-mail-host b/switch-mail-host index 053134a..d129b62 100644 --- a/switch-mail-host +++ b/switch-mail-host @@ -58,11 +58,11 @@ restore_old_btrbk=false err-cleanup() { if $restore_new_btrbk; then e WARNING: due to failure, btrbk.timer may need manual restoration: - e $new_shell systemctl start btrbk.timer + e $new_shell systemctl enable --now btrbk.timer fi if $restore_old_btrbk; then e WARNING: due to failure, btrbk.timer may need manual restoration: - e $old_shell systemctl start btrbk.timer + e $old_shell systemctl enable --now btrbk.timer fi } @@ -260,11 +260,11 @@ fi ########### end initial processing, begin actually modifying things ########## if $new_shell systemctl is-active btrbk.timer; then - m $new_shell systemctl stop btrbk.timer + m $new_shell systemctl disable --now btrbk.timer restore_new_btrbk=true fi if $old_shell systemctl is-active btrbk.timer; then - m $old_shell systemctl stop btrbk.timer + m $old_shell systemctl disable --now btrbk.timer restore_old_btrbk=true fi