+ ### end removing panic lines due to service restarts ###
+
+
+ ## begin broken pipe & write lock & general alert ##
+ regex="Failed to get write lock\|Failed writing transport results to pipe: Broken pipe$"
+ newlines=false
+ count=0
+ while read -r day time _; do
+ log_s=$(date -d "$day $time" +%s)
+ count=$((count+1))
+ if (( log_s > EPOCHSECONDS - 300 )); then
+ newlines=true
+ fi
+ done < <(grep "$regex" $pl ||:)
+ if (( count )); then
+ # I see broken pipe in groups of 3 for the same message around once a day
+ # randomly. I'm guessing they are related to running 2 instances of
+ # exim which share the same spool. So, if we have some, but not in
+ # the last 5 minutes, and less than 20, it should be fine to clear
+ # them. write lock happens less but can fit under the same rule.
+ if (( count > 20 )); then
+ pr_metric=1
+ elif ! $newlines; then
+ grep "$regex" $pl |& tee -a $pl-archive
+ v "above is from grep $regex"
+ sed -i "/$regex/d" $pl
+ fi
+ fi
+
+ # I think we could alert on anything else older than 61 seconds,
+ # but lets just add some slack, make it 2 minutes.
+ while read -r day time _; do
+ # some lines dont have dates, just skip them
+ # 2022-09-16 15:21:06.250 [438097] Exim configuration error:
+ # can't redefine an undefined macro "REMOTE_SMTP_SMARTHOST_TLS_VERIFY_HOSTS"
+ if [[ $day != [2-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] ]]; then
+ continue
+ fi
+ log_s=$(date -d "$day $time" +%s)
+ if (( EPOCHSECONDS - 120 > log_s )); then
+ pr_metric=1
+ fi
+ # pr_metrix for $regex is handled above
+ done < <(grep -v "$regex" $pl ||:)
+ ## end broken pipe ##
+
+ echo "exim_paniclog $pr_metric" >/var/lib/prometheus/node-exporter/exim_paniclog.prom
+