# See the License for the specific language governing permissions and
# limitations under the License.
+
+# requires 1.4.0 to run with multiple units due to bug before that:
+# https://github.com/prometheus/node_exporter/pull/2475
+
set -eE -o pipefail
trap 'echo "$0:$LINENO:error: \"$BASH_COMMAND\" returned $?" >&2' ERR
exit 0
fi
-# we have to merge files due to this:
-# https://github.com/prometheus/node_exporter/issues/1885
-# or else we could put the label in the metric name, but that
-# is a bad practice.
-# Note, i found this https://github.com/hansmi/prometheus-textformat-merge
-# but it seems overkill.
-f=$dir/${ser_name}-result-fail.premerge
+f=$dir/${ser_name}-result.prom
ftmp=$f.$$
if $write_count; then
printf 'node_systemd_unit_result_fail_count{name="%s"} %s\n' "$ser_name" "$count" >$ftmp
mv $ftmp $f
- finaltmp=$dir/sysd-result-fail.prom.$$
- # i had a minor problem that multiple of these scripts executed at the same time, and ended up with
- # multiple instances of the same file
- # ls -lai
- # ...
- # 3896242 -rw-r--r-- 1 root root 439 Apr 4 00:02 sysd-result-fail.prom
- # 3896242 -rw-r--r-- 1 root root 439 Apr 4 00:02 sysd-result-fail.prom
- # 3896242 -rw-r--r-- 1 root root 439 Apr 4 00:02 sysd-result-fail.prom
- # this seems harmless, https://www.spinics.net/lists/linux-btrfs/msg111245.html
- # however, lets do a bit to avoid it: sleep a random amount of time from 0-.5 seconds.
- sleep 0.$(( RANDOM % 500 ))
- cat $dir/*-result-fail.premerge >$finaltmp
- mv $finaltmp $dir/sysd-result-fail.prom
fi
mkdir -p /var/lib/prometheus/node-exporter
-f=/var/local/sysd-prom-fail/${ser_name}-result-fail.prom
+f=/var/local/sysd-prom-fail/${ser_name}-result.prom
-if [[ ! -s /var/lib/prometheus/node-exporter/${ser_name}-result-fail.premerge ]]; then
+if [[ ! -s $f ]]; then
SERVICE_RESULT=success /usr/local/bin/sysd-prom-fail $ser_name
fi
systemctl restart $ser_name ||:
fi
fi
+
+# todo: make uninstall script