[defaults]
-forks = 200
+# 2.7.4-1ppa~xenial would use 194M of memory resident in htop
+# and run out of 4g of memory. just ran ansible 2.9.9-1ppa~bionic+9.0trisquel1
+# and with 23 fork limit, it topped out at 1 gig used of memory.
+# we have about 60 hosts, so 100 should allow them all to run in
+# parallell without a problem without having memory problems.
+# oldused=0; while true; do used=$(free -w -t | tail -n1 | awk '{print $3}'); if ((used > oldused )); then oldused=$used; echo $(date) $used | tee used; fi; sleep 1; done
+forks = 100
+
+# Ansible doesnt have have trisquels python path in its os info.
+# Silence a warning
+# https://docs.ansible.com/ansible/2.9/reference_appendices/interpreter_discovery.html
+interpreter_python = auto_silent
+# strategy = free # DO NOT ENABLE.
+#
+# As of 2019-08-07, include_tasks is very broken with the free strategy.
+# tasks will not be run for some hosts, or "when" rules ignored and run
+# for the wrong host, or some hosts a task will be run twice. Even if we
+# switch to import_tasks, I wouldn't trust using this until that bug is
+# found and fixed. repro: tested with 2.7.4, (no bug reports or fixes
+# found), Running just the common role, then searching for which hosts
+# an install.yml included role got run using
+#
+# f() { awk '/xfsprogs/ { x = 1; next }; /^TASK/ { x = 0 }; x && /\[/ { print }' $1 | sort | uniq -c | pee cat wc; }
+# f LOGFILE
host_key_checking = False
display_skipped_hosts = False
retry_files_enabled = False
# readable output
stdout_callback = yaml
-callback_whitelist = timer, profile_tasks
+# Our logs are already pretty big. You can temporarily uncomment to enable
+# profiling info in the logs.
+#callback_whitelist = timer, profile_tasks, profile_roles
+
+# Ansible suggests using the file module instead of chmod, but then it
+# follows symlinks without an option to turn it off, which is completely
+# braindead and screwed up my system.
+command_warnings=False
[ssh_connection]
pipelining = True
---
-- hosts: tp.b8.nz
+- hosts: kd.b8.nz
roles:
- role: prom
+ tags: a
+ prometheus_targets:
+ node:
+ - targets:
+ - "{{ ansible_fqdn }}:9100"
prometheus_scrape_configs:
- job_name: "prometheus"
metrics_path: "{{ prometheus_metrics_path }}"
static_configs:
- targets:
- - "{{ ansible_fqdn | default(ansible_host) | default('localhost') }}:9090"
+ - "{{ ansible_fqdn }}:9090"
- job_name: "node"
- basic_auth:
- username: prom
- password_file: /etc/prometheus-pass
- scheme: "https"
+ # basic_auth:
+ # username: prom
+ # password_file: /etc/prometheus-pass
+ #scheme: "https"
file_sd_configs:
- files:
- "{{ prometheus_config_dir }}/file_sd/node.yml"
+ # added because of warning in log
+ prometheus_alertmanager_config:
+ - static_configs:
+ - targets:
+ - "{{ ansible_fqdn }}:9093"
- prometheus_targets:
- node:
- - targets: "{{ groups.all|map('regex_replace','$',':9101')|list }}"
- role: node-exporter
- node_exporter_web_listen_address: "127.0.0.1:9100"
+ node_exporter_web_listen_address: "127.0.1.1:9100"
+
- role: alertmanager
alertmanager_smtp:
- smarthost: 'localhost:25'
- from: "alertmanager@{{ ansible_fqdn | default(ansible_host) | default('localhost') }}"
+ smarthost: 'mail.iankelling.org:587'
+ from: "alerts@iankelling.org"
require_tls: false
+ hello: 'defaultnn.b8.nz'
alertmanager_route:
receiver: defaultreceiver
alertmanager_receivers:
- name: defaultreceiver
email_configs:
- to: alerts@iankelling.org
- html: "{% raw -%}{{ template \"email.default.html\" . }}{% endraw -%}"
- text: "{% raw -%}{{ template \"email.default.text\" . }}{% endraw -%}"
+ send_resolved: true
+ # the html was a bit ugly and just a huge waste of text,
+ # https://github.com/prometheus/alertmanager/issues/2232
+ # lead me to find a convenient text option to use
+ html:
+ text: '{% raw -%}{{ template "opsgenie.default.description" . }}{% endraw -%}'
+ alertmanager_web_listen_address: '127.0.1.1:9093'
- alertmanager_listen_address: '127.0.0.1:9093'
- role: grafana
- grafana_address: "127.0.0.1"
+ grafana_address: "127.0.1.1"
# iank: playbook will halt if no password is set. this is only
# available to localhost, so i dont really care, but might as well
# generate a pass isntead of putting in pw123 etc.
# https://prometheus.io/docs/visualization/grafana/
grafana_dashboards:
- dashboard_id: 1860
- revision_id: 13
+ revision_id: 21
datasource: prometheus
- dashboard_id: 405
revision_id: 8