From b1a72a8ecf49c2bec0036152a1c9b1b4d49736df Mon Sep 17 00:00:00 2001 From: s3lph Date: Sat, 6 Nov 2021 05:22:17 +0100 Subject: [PATCH] Full doover of the prometheus role --- roles/prometheus/defaults/main.yml | 7 +- roles/prometheus/handlers/main.yml | 10 +- roles/prometheus/tasks/blackbox.yml | 11 +- roles/prometheus/tasks/flush.yml | 25 +-- roles/prometheus/tasks/install.yml | 67 +++----- roles/prometheus/tasks/main.yml | 12 +- roles/prometheus/tasks/prometheus.yml | 145 +++--------------- .../templates/etc-defaults-blackbox | 12 -- .../default/prometheus-blackbox-exporter.j2 | 34 ++++ .../templates/etc/default/prometheus.j2 | 114 ++++++++++++++ .../etc/prometheus/alert_rules.yml.j2 | 18 +++ .../prometheus/blackbox.yml.j2} | 0 .../etc/prometheus/prometheus.yml.j2 | 80 ++++++++++ .../override.conf.j2 | 5 + .../templates/prometheus-alert-base.yml | 6 - .../prometheus/templates/prometheus-base.yml | 25 --- ...prometheus-blackbox-exporter-local.service | 15 -- roles/prometheus/templates/prometheus-job.yml | 52 ------- roles/prometheus_node/tasks/install.yml | 2 +- 19 files changed, 330 insertions(+), 310 deletions(-) delete mode 100644 roles/prometheus/templates/etc-defaults-blackbox create mode 100644 roles/prometheus/templates/etc/default/prometheus-blackbox-exporter.j2 create mode 100644 roles/prometheus/templates/etc/default/prometheus.j2 create mode 100644 roles/prometheus/templates/etc/prometheus/alert_rules.yml.j2 rename roles/prometheus/templates/{blackbox.yml => etc/prometheus/blackbox.yml.j2} (100%) create mode 100644 roles/prometheus/templates/etc/prometheus/prometheus.yml.j2 create mode 100644 roles/prometheus/templates/etc/systemd/system/prometheus-blackbox-exporter.service.d/override.conf.j2 delete mode 100644 roles/prometheus/templates/prometheus-alert-base.yml delete mode 100644 roles/prometheus/templates/prometheus-base.yml delete mode 100644 roles/prometheus/templates/prometheus-blackbox-exporter-local.service delete mode 100644 roles/prometheus/templates/prometheus-job.yml diff --git a/roles/prometheus/defaults/main.yml b/roles/prometheus/defaults/main.yml index cb33d4c..1e6b451 100644 --- a/roles/prometheus/defaults/main.yml +++ b/roles/prometheus/defaults/main.yml @@ -1,7 +1,6 @@ --- blackbox_prefer_ipv4: no -prometheus_blackbox_exporter_version: 0.16.0 prometheus_port: 9090 prometheus_alertmanager_port: 9093 @@ -10,3 +9,9 @@ prometheus_node_exporter_port: 9100 prometheus_scrape_interval: 1m prometheus_evaluation_interval: 1m prometheus_lookback_delta: 5m +prometheus_additional_arguments: "" +prometheus_blackbox_exporter_additional_arguments: "" + +prometheus_hostgroup_alertmanagers: alertmanagers + +prometheus_job_default_group_name: default diff --git a/roles/prometheus/handlers/main.yml b/roles/prometheus/handlers/main.yml index 37d9315..0ba85f9 100644 --- a/roles/prometheus/handlers/main.yml +++ b/roles/prometheus/handlers/main.yml @@ -1,11 +1,15 @@ --- -- name: restart prometheus +- name: systemctl daemon-reload ansible.builtin.systemd: + daemon_reload: yes + +- name: restart prometheus + ansible.builtin.service: name: prometheus.service state: restarted - name: restart blackbox exporter - ansible.builtin.systemd: - name: prometheus-blackbox-exporter-local.service + ansible.builtin.service: + name: prometheus-blackbox-exporter state: restarted diff --git a/roles/prometheus/tasks/blackbox.yml b/roles/prometheus/tasks/blackbox.yml index da71d46..5002e5b 100644 --- a/roles/prometheus/tasks/blackbox.yml +++ b/roles/prometheus/tasks/blackbox.yml @@ -11,7 +11,7 @@ - name: render blackbox exporter configuration ansible.builtin.template: - src: blackbox.yml + src: etc/prometheus/blackbox.yml.j2 dest: /etc/prometheus/blackbox.yml owner: root group: root @@ -19,3 +19,12 @@ vars: blackbox_modules: "{{ _blackbox_modules }}" notify: restart blackbox exporter + +- name: render /etc/default/prometheus-blackbox-exporter + ansible.builtin.template: + src: etc/defaults/prometheus-blackbox-exporter.j2 + dest: /etc/default/prometheus-blackbox-exporter + owner: root + group: root + mode: 0644 + notify: restart blackbox exporter diff --git a/roles/prometheus/tasks/flush.yml b/roles/prometheus/tasks/flush.yml index 1ebfbe8..4669c90 100644 --- a/roles/prometheus/tasks/flush.yml +++ b/roles/prometheus/tasks/flush.yml @@ -2,11 +2,14 @@ - name: enable prometheus admin api ansible.builtin.lineinfile: - path: /etc/default/prometheus - regexp: "^ARGS=.*$" - line: >- - ARGS="--query.lookback-delta={{ prometheus_lookback_delta }} --web.enable-admin-api --web.listen-address=127.0.0.1:9090" - insertbefore: BOF + src: etc/default/prometheus.j2 + dest: /etc/default/prometheus + owner: root + group: root + mode: 0644 + vars: + prometheus_additional_arguments: + "{{ prometheus_additional_arguments }} --web.enable-admin-api --web.listen-address=127.0.0.1:9090" notify: restart prometheus - name: flush handlers @@ -29,12 +32,12 @@ seconds: 5 - name: disable prometheus admin api - ansible.builtin.lineinfile: - path: /etc/default/prometheus - regexp: "^ARGS=.*$" - line: >- - ARGS="--query.lookback-delta={{ prometheus_lookback_delta }}" - insertbefore: BOF + ansible.builtin.template: + src: etc/default/prometheus.j2 + dest: /etc/default/prometheus + owner: root + group: root + mode: 0644 notify: restart prometheus - name: flush handlers diff --git a/roles/prometheus/tasks/install.yml b/roles/prometheus/tasks/install.yml index 60126c5..c4fceb2 100644 --- a/roles/prometheus/tasks/install.yml +++ b/roles/prometheus/tasks/install.yml @@ -2,60 +2,29 @@ - name: install prometheus ansible.builtin.apt: - name: prometheus - state: present - update_cache: yes + name: + - prometheus + - prometheus-blackbox-exporter + +- name: enable cap_net_raw for blackbox exporter + ansible.builtin.template: + src: etc/systemd/system/prometheus-blackbox-exporter.service.d/override.conf.j2 + dest: /etc/systemd/system/prometheus-blackbox-exporter.service.d/override.conf + owner: root + group: root + mode: 0644 + notify: + - systemctl daemon-reload + - restart blackbox exporter - name: start and enable prometheus - ansible.builtin.systemd: - name: prometheus.service + ansible.builtin.service: + name: prometheus state: started enabled: yes -- name: download blackbox exporter - ansible.builtin.get_url: - url: "https://github.com/prometheus/blackbox_exporter/releases/download/v{{ prometheus_blackbox_exporter_version }}/blackbox_exporter-{{ prometheus_blackbox_exporter_version }}.linux-amd64.tar.gz" # noqa 204 - dest: "/root/blackbox_exporter-{{ prometheus_blackbox_exporter_version }}.tar.gz" - -- name: unpack blackbox exporter - ansible.builtin.unarchive: - src: "/root/blackbox_exporter-{{ prometheus_blackbox_exporter_version }}.tar.gz" - dest: "/root" - remote_src: yes - -- name: install blackbox exporter - ansible.builtin.copy: - src: "/root/blackbox_exporter-{{ prometheus_blackbox_exporter_version }}.linux-amd64/blackbox_exporter" - dest: /usr/local/bin/prometheus-blackbox-exporter - remote_src: yes - owner: root - group: root - mode: 0755 - -- name: install blackbox exporter defaults file - ansible.builtin.template: - src: etc-defaults-blackbox - dest: /etc/default/prometheus-blackbox-exporter - owner: root - group: root - mode: 0644 - -- name: install blackbox exporter service - ansible.builtin.template: - src: prometheus-blackbox-exporter-local.service - dest: /etc/systemd/system/prometheus-blackbox-exporter-local.service - owner: root - group: root - mode: 0644 - -- name: stop and diable native blackbox exporter - ansible.builtin.systemd: - name: prometheus-blackbox-exporter.service - state: stopped - enabled: no - - name: start and enable blackbox exporter - ansible.builtin.systemd: - name: prometheus-blackbox-exporter-local.service + ansible.builtin.service: + name: prometheus-blackbox-exporter state: started enabled: yes diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml index 9ec1bfc..c9ca2f4 100644 --- a/roles/prometheus/tasks/main.yml +++ b/roles/prometheus/tasks/main.yml @@ -18,10 +18,8 @@ - "role::prometheus" - "role::prometheus:config" -# Some problems with "never" tag -#- name: flush prometheus time series database -# import_tasks: flush.yml -# tags: -# - "role::prometheus" -# - "role::prometheus:flush" -# - "never" +- name: flush prometheus time series database + import_tasks: flush.yml + tags: + - "role::prometheus:flush" + - "never" diff --git a/roles/prometheus/tasks/prometheus.yml b/roles/prometheus/tasks/prometheus.yml index db07962..4980bf9 100644 --- a/roles/prometheus/tasks/prometheus.yml +++ b/roles/prometheus/tasks/prometheus.yml @@ -1,144 +1,35 @@ --- - -- name: create config fragment directory - delegate_to: localhost - run_once: yes - ansible.builtin.file: - path: "{{ playbook_dir }}/.prometheus/{{ item }}.d" - state: directory - mode: 0755 - loop: - - conf - - alerts - -- name: list current config fragments - delegate_to: localhost - ansible.builtin.find: - paths: - - "{{ playbook_dir }}/.prometheus/conf.d/" - - "{{ playbook_dir }}/.prometheus/alerts.d/" - recurse: yes - patterns: "*.yml" - excludes: "00-base.yml" - register: prometheus_register_current_fragments - -- name: process current config fragments - ansible.builtin.set_fact: - prometheus_register_current_fragments: - "{{ prometheus_register_current_fragments.files | map(attribute='path') | list }}" - -- name: render prometheus base config - delegate_to: localhost - run_once: yes + +- name: render /etc/prometheus/prometheus.yml ansible.builtin.template: - src: prometheus-base.yml - dest: "{{ playbook_dir }}/.prometheus/conf.d/00-base.yml" - mode: 0644 - -- name: render prometheus job configs - delegate_to: localhost - run_once: yes - ansible.builtin.template: - src: prometheus-job.yml - dest: "{{ playbook_dir }}/.prometheus/conf.d/{{ '%02d' | format(counter+1) }}-job-{{ item.name }}.yml" - mode: 0644 - vars: - job: "{{ item }}" - loop: "{{ hostvars[inventory_hostname] | dict2items | selectattr('key', 'match', '^prometheus_job_.+$') | map(attribute='value') | list }}" - loop_control: - index_var: counter - register: prometheus_register_new_config_fragments - -- name: remove newly created files from deletion list - ansible.builtin.set_fact: - prometheus_register_current_fragments: - "{{ prometheus_register_current_fragments | difference(prometheus_register_new_config_fragments.results | map(attribute='dest') | list) }}" - -- name: render prometheus alert base config - delegate_to: localhost - run_once: yes - ansible.builtin.template: - src: prometheus-alert-base.yml - dest: .prometheus/alerts.d/00-base.yml - mode: 0644 - -- name: render prometheus alert configs - delegate_to: localhost - run_once: yes - ansible.builtin.copy: - content: "{{ item.alerts | to_nice_yaml(indent=2) | indent(2, first=true) }}" - dest: "{{ playbook_dir }}/.prometheus/alerts.d/{{ '%02d' | format(counter+1) }}-alert-{{ item.name }}.yml" - mode: 0644 - loop: "{{ hostvars[inventory_hostname] | dict2items | selectattr('key', 'match', '^prometheus_job_.+$') | map(attribute='value') | list }}" - loop_control: - index_var: counter - register: prometheus_register_new_alert_fragments - -- name: remove newly created files from deletion list - ansible.builtin.set_fact: - prometheus_register_current_fragments: - "{{ prometheus_register_current_fragments | difference(prometheus_register_new_alert_fragments.results | map(attribute='dest') | list) }}" - -- name: render host-specific prometheus alert configs - delegate_to: localhost - ansible.builtin.copy: - content: "{{ hostvars[item].prometheus_host_specific_alerts | to_nice_yaml(indent=2) | indent(2, first=true) }}" - dest: "{{ playbook_dir }}/.prometheus/alerts.d/{{ '99-host-%s' | format(hostvars[item].inventory_hostname) }}-alerts.yml" - mode: 0644 - when: "'prometheus_host_specific_alerts' in hostvars[item]" - loop: "{{ hostvars.keys() | list }}" - register: prometheus_register_new_host_specific_alert_fragments - -- name: remove newly created files from deletion list - ansible.builtin.set_fact: - prometheus_register_current_fragments: - "{{ prometheus_register_current_fragments | difference(prometheus_register_new_host_specific_alert_fragments.results | selectattr('dest', 'defined') | map(attribute='dest') | list) }}" # noqa 204 - -- name: delete old config fragments - delegate_to: localhost - ansible.builtin.file: - path: "{{ item }}" - state: absent - loop: "{{ prometheus_register_current_fragments }}" - -- name: assemble fragment directories - delegate_to: localhost - ansible.builtin.assemble: - src: "{{ playbook_dir }}/.prometheus/{{ item.local }}.d/" - dest: "{{ playbook_dir }}/.prometheus/{{ item.remote }}.yml" - delimiter: "\n\n" - mode: 0644 - loop: - - local: conf - remote: prometheus - - local: alerts - remote: alert_rules - -- name: upload config files to host - ansible.builtin.copy: - src: "{{ playbook_dir }}/.prometheus/prometheus.yml" - dest: "/etc/prometheus/prometheus.yml" + src: etc/prometheus/prometheus.yml.j2 + dest: /etc/prometheus/prometheus.yml owner: root group: root mode: 0644 validate: /usr/bin/promtool check config %s + vars: + jobs: "{{ hostvars[inventory_hostname] | dict2items | selectattr('key', 'match', '^prometheus_job_.+$') | map(attribute='value') | list }}" notify: restart prometheus -- name: upload alert config file to host +- name: render /etc/prometheus/alert_rules.yml ansible.builtin.copy: - src: "{{ playbook_dir }}/.prometheus/alert_rules.yml" - dest: "/etc/prometheus/alert_rules.yml" + src: etc/prometheus/alert_rules.yml.j2 + dest: /etc/prometheus/alert_rules.yml owner: root group: root mode: 0644 validate: /usr/bin/promtool check rules %s + vars: + groups: "{{ hostvars[inventory_hostname] | dict2items | selectattr('key', 'match', '^prometheus_job_.+$') | map(attribute='value') | map(attribute='group') | unique | default([prometheus_job_default_group_name]) }}" + jobs: "{{ hostvars[inventory_hostname] | dict2items | selectattr('key', 'match', '^prometheus_job_.+$') | map(attribute='value') | list }}" notify: restart prometheus -- name: configure prometheus lookback delta - ansible.builtin.lineinfile: +- name: render /etc/default/prometheus + ansible.builtin.template: + src: etc/default/prometheus.j2 path: /etc/default/prometheus - regexp: "^ARGS=.*$" - line: >- - ARGS="--query.lookback-delta={{ prometheus_lookback_delta }}" - insertbefore: BOF + owner: root + group: root + mode: 0644 notify: restart prometheus diff --git a/roles/prometheus/templates/etc-defaults-blackbox b/roles/prometheus/templates/etc-defaults-blackbox deleted file mode 100644 index adc2c59..0000000 --- a/roles/prometheus/templates/etc-defaults-blackbox +++ /dev/null @@ -1,12 +0,0 @@ -{{ ansible_managed | comment }} - -ARGS='--config.file /etc/prometheus/blackbox.yml' - -# Usage of prometheus-blackbox-exporter: -# --config.file="blackbox.yml" -# Blackbox exporter configuration file. -# --web.listen-address=":9115" -# The address to listen on for HTTP requests. -# --timeout-offset=0.5 Offset to subtract from timeout in seconds. -# --log.level=info Only log messages with the given severity or above. -# One of: [debug, info, warn, error] diff --git a/roles/prometheus/templates/etc/default/prometheus-blackbox-exporter.j2 b/roles/prometheus/templates/etc/default/prometheus-blackbox-exporter.j2 new file mode 100644 index 0000000..8547354 --- /dev/null +++ b/roles/prometheus/templates/etc/default/prometheus-blackbox-exporter.j2 @@ -0,0 +1,34 @@ +{{ ansible_managed | comment }} + +# On sysvinit systems, when changing the configuration pathname also +# change it in the init script. +ARGS="--config.file /etc/prometheus/blackbox.yml {{ prometheus_blackbox_exporter_additional_arguments }}" + +# Usage of prometheus-blackbox-exporter: +# --config.file="blackbox.yml" +# Blackbox exporter configuration file. +# --web.listen-address=":9115" +# The address to listen on for HTTP requests. +# --timeout-offset=0.5 +# Offset to subtract from timeout in seconds. +# --history.limit=100 +# The maximum amount of items to keep in the +# history. +# --web.external-url= +# The URL under which Blackbox exporter is +# externally reachable (for example, if Blackbox +# exporter is served via a reverse proxy). Used +# for generating relative and absolute links back +# to Blackbox exporter itself. If the URL has a +# path portion, it will be used to prefix all +# HTTP endpoints served by Blackbox exporter. If +# omitted, relevant URL components will be +# derived automatically. +# --web.route-prefix= +# Prefix for the internal routes of web endpoints. +# Defaults to path of --web.external-url. +# --log.level=info +# Only log messages with the given severity or above. +# One of: [debug, info, warn, error] +# --log.format=logfmt +# Output format of log messages. One of: [logfmt, json] diff --git a/roles/prometheus/templates/etc/default/prometheus.j2 b/roles/prometheus/templates/etc/default/prometheus.j2 new file mode 100644 index 0000000..1fd3f12 --- /dev/null +++ b/roles/prometheus/templates/etc/default/prometheus.j2 @@ -0,0 +1,114 @@ +{{ ansible_managed | comment }} + +# Set the command-line arguments to pass to the server. +ARGS="--query.lookback-delta={{ prometheus_lookback_delta }} {{ prometheus_additional_arguments }}" + +# Prometheus supports the following options: +# --config.file="/etc/prometheus/prometheus.yml" +# Prometheus configuration file path. +# --web.listen-address="0.0.0.0:9090" +# Address to listen on for UI, API, and telemetry. +# --web.read-timeout=5m Maximum duration before timing out read of the +# request, and closing idle connections. +# --web.max-connections=512 Maximum number of simultaneous connections. +# --web.external-url= The URL under which Prometheus is externally +# reachable (for example, if Prometheus is served +# via a reverse proxy). Used for generating +# relative and absolute links back to Prometheus +# itself. If the URL has a path portion, it will +# be used to prefix all HTTP endpoints served by +# Prometheus. If omitted, relevant URL components +# will be derived automatically. +# --web.route-prefix= Prefix for the internal routes of web endpoints. +# Defaults to path of --web.external-url. +# --web.local-assets="/usr/share/prometheus/web/" +# Path to static asset/templates directory. +# --web.user-assets= Path to user asset directory, available at +# /user. +# --web.enable-lifecycle Enable shutdown and reload via HTTP request. +# --web.enable-admin-api Enable API endpoints for admin control actions. +# --web.console.templates="/etc/prometheus/consoles" +# Path to the console template directory, +# available at /consoles. +# --web.console.libraries="/etc/prometheus/console_libraries" +# Path to the console library directory. +# --web.page-title="Prometheus Time Series Collection and Processing Server" +# Document title of Prometheus instance. +# --web.cors.origin=".*" Regex for CORS origin. It is fully anchored. +# Example: 'https?://(domain1|domain2)\.com' +# --storage.tsdb.path="/var/lib/prometheus/metrics2/" +# Base path for metrics storage. +# --storage.tsdb.retention=15d +# [DEPRECATED] How long to retain samples in +# storage. This flag has been deprecated, use +# "storage.tsdb.retention.time" instead +# --storage.tsdb.retention.time=15d +# How long to retain samples in storage. When this +# flag is set it overrides +# "storage.tsdb.retention". +# If neither this flag nor "storage.tsdb.retention" +# nor "storage.tsdb.retention.size" is set, the +# retention time defaults to 15d. +# Units Supported: y, w, d, h, m, s, ms. +# --storage.tsdb.retention.size= +# [EXPERIMENTAL] Maximum number of bytes that can +# be stored for blocks. Units supported: KB, MB, +# GB, TB, PB. This flag is experimental and can be +# changed in future releases. +# --storage.tsdb.use-lockfile +# Create a lockfile in data directory. +# --storage.tsdb.allow-overlapping-blocks +# [EXPERIMENTAL] Allow overlapping blocks, which +# in turn enables vertical compaction and +# vertical query merge. +# --storage.tsdb.wal-compression +# Compress the tsdb WAL. +# --storage.remote.flush-deadline= +# How long to wait flushing sample on shutdown or +# config reload. +# --storage.remote.read-sample-limit=5e7 +# Maximum overall number of samples to return via +# the remote read interface, in a single query. 0 +# means no limit. This limit is ignored for +# streamed response types. +# --storage.remote.read-concurrent-limit=10 +# Maximum number of concurrent remote read calls. +# 0 means no limit. +# --storage.remote.read-max-bytes-in-frame=1048576 +# Maximum number of bytes in a single frame for +# streaming remote read response types before +# marshalling. Note that client might have limit on +# frame size as well. 1MB as recommended by +# protobuf by default. +# --rules.alert.for-outage-tolerance=1h +# Max time to tolerate prometheus outage for +# restoring "for" state of alert. +# --rules.alert.for-grace-period=10m +# Minimum duration between alert and restored "for" +# state. This is maintained only for alerts with +# configured "for" time greater than grace period. +# --rules.alert.resend-delay=1m +# Minimum amount of time to wait before resending +# an alert to Alertmanager. +# --alertmanager.notification-queue-capacity=10000 +# The capacity of the queue for pending +# Alertmanager notifications. +# --alertmanager.timeout=10s +# Timeout for sending alerts to Alertmanager. +# --query.lookback-delta=5m The maximum lookback duration for retrieving +# metrics during expression evaluations and +# federation. +# --query.timeout=2m Maximum time a query may take before being +# aborted. +# --query.max-concurrency=20 +# Maximum number of queries executed concurrently. +# --query.max-samples=50000000 +# Maximum number of samples a single query can load +# into memory. Note that queries will fail if they +# try to load more samples than this into memory, +# so this also limits the number of samples a query +# can return. +# --log.level=info Only log messages with the given severity or +# above. One of: [debug, info, warn, error] +# --log.format=logfmt Output format of log messages. One of: [logfmt, +# json] diff --git a/roles/prometheus/templates/etc/prometheus/alert_rules.yml.j2 b/roles/prometheus/templates/etc/prometheus/alert_rules.yml.j2 new file mode 100644 index 0000000..1ff8ea4 --- /dev/null +++ b/roles/prometheus/templates/etc/prometheus/alert_rules.yml.j2 @@ -0,0 +1,18 @@ +--- +{{ ansible_managed | comment }} + +groups: + +{% for group in groups %} + +- name: {{ group } + rules: +{% for job in jobs %} +{% if job.group | default(prometheus_job_default_group_name) == group %} + +{{ job.alerts | to_nice_yaml(indent=2) | indent(2, first=true) }} + +{% endif %} +{% endfor %} + +{% endfor %} \ No newline at end of file diff --git a/roles/prometheus/templates/blackbox.yml b/roles/prometheus/templates/etc/prometheus/blackbox.yml.j2 similarity index 100% rename from roles/prometheus/templates/blackbox.yml rename to roles/prometheus/templates/etc/prometheus/blackbox.yml.j2 diff --git a/roles/prometheus/templates/etc/prometheus/prometheus.yml.j2 b/roles/prometheus/templates/etc/prometheus/prometheus.yml.j2 new file mode 100644 index 0000000..3ecdfec --- /dev/null +++ b/roles/prometheus/templates/etc/prometheus/prometheus.yml.j2 @@ -0,0 +1,80 @@ +--- +{{ ansible_managed | comment }} + +global: + scrape_interval: {{ prometheus_scrape_interval }} + evaluation_interval: {{ prometheus_evaluation_interval }} + external_labels: + monitor: {{ prometheus_monitor_name | default(ansible_hostname) }} + +alerting: + alertmanagers: + - static_configs: + {% if groups[prometheus_hostgroup_alertmanagers] | default([]) | length == 0 -%} + - targets: [] + {%- else -%} + - targets: + {% for host in groups[prometheus_hostgroup_alertmanagers] -%} + - "{{ hostvars[host].inventory_hostname }}:{{ hostvars[host].prometheus_alertmanager_port | default(prometheus_alertmanager_port) | int }}" + {%- endfor %} + {%- endif %} + +rule_files: + - /etc/prometheus/alert_rules.yml + +scrape_configs: + +{% for job in jobs %} + +- job_name: {{ job.name }} +{% if 'scrape_interval' in job %} + scrape_interval: {{ job.scrape_interval }} +{% endif %} +{% if 'blackbox' in job %} + metrics_path: /probe + relabel_configs: + - source_labels: + - __address__ + target_label: __param_target + - source_labels: + - __param_target + target_label: instance + - target_label: __address__ + replacement: localhost:9115 + params: + module: +{% for module in job.blackbox.modules.keys() %} + - {{ module }} +{% endfor %} +{% endif %} + +{% if 'scheme' in job %} + scheme: {{ job.scheme }} +{% endif %} + +{% if 'basic_auth_user' in job and 'basic_auth_password' in job %} + basic_auth: + username: {{ job.basic_auth_user }} + password: {{ job.basic_auth_password }} +{% endif %} + +{% if 'tls_config' in job %} + tls_config: + ca_file: {{ job.tls_config.ca_file }} + cert_file: {{ job.tls_config.cert_file }} + key_file: {{ job.tls_config.key_file }} + insecure_skip_verify: {{ job.tls_config.insecure_skip_verify | default(false) }} +{% endif %} + + static_configs: + - targets: +{% for host in groups['all'] %} +{% if hostvars[host]['monitor_' + job.name] | default(false) | bool %} +{%- set fstring = {} %} +{%- set _ = fstring.update(job.defaults|default({})) %} +{%- set _ = fstring.update(hostvars[host]) %} + - {{ job.target_format | format(**fstring) | format(**fstring) }} +{% endif %} +{% endfor %} + +{% endfor %} diff --git a/roles/prometheus/templates/etc/systemd/system/prometheus-blackbox-exporter.service.d/override.conf.j2 b/roles/prometheus/templates/etc/systemd/system/prometheus-blackbox-exporter.service.d/override.conf.j2 new file mode 100644 index 0000000..7f953dd --- /dev/null +++ b/roles/prometheus/templates/etc/systemd/system/prometheus-blackbox-exporter.service.d/override.conf.j2 @@ -0,0 +1,5 @@ +{{ ansible_managed | comment }} + +[Service] +AmbientCapabilities=CAP_NET_RAW +NoNewPrivileges=true diff --git a/roles/prometheus/templates/prometheus-alert-base.yml b/roles/prometheus/templates/prometheus-alert-base.yml deleted file mode 100644 index 5e12489..0000000 --- a/roles/prometheus/templates/prometheus-alert-base.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -{{ ansible_managed | comment }} - -groups: -- name: 'ccc-basel' - rules: diff --git a/roles/prometheus/templates/prometheus-base.yml b/roles/prometheus/templates/prometheus-base.yml deleted file mode 100644 index 38d3bf8..0000000 --- a/roles/prometheus/templates/prometheus-base.yml +++ /dev/null @@ -1,25 +0,0 @@ ---- -{{ ansible_managed | comment }} - -global: - scrape_interval: {{ prometheus_scrape_interval }} - evaluation_interval: {{ prometheus_evaluation_interval }} - external_labels: - monitor: {{ prometheus_monitor_name | default(ansible_hostname) }} - -alerting: - alertmanagers: - - static_configs: - {% if groups['alertmanagers'] | default([]) | length == 0 -%} - - targets: [] - {%- else -%} - - targets: - {% for host in groups['alertmanagers'] -%} - - "{{ hostvars[host].inventory_hostname }}:{{ hostvars[host].prometheus_alertmanager_port | default(prometheus_alertmanager_port) | int }}" - {%- endfor %} - {%- endif %} - -rule_files: - - /etc/prometheus/alert_rules.yml - -scrape_configs: diff --git a/roles/prometheus/templates/prometheus-blackbox-exporter-local.service b/roles/prometheus/templates/prometheus-blackbox-exporter-local.service deleted file mode 100644 index 35f2977..0000000 --- a/roles/prometheus/templates/prometheus-blackbox-exporter-local.service +++ /dev/null @@ -1,15 +0,0 @@ -[Unit] -Description=Blackbox exporter for Prometheus -Documentation=https://prometheus.io/docs/introduction/overview/ - -[Service] -Restart=always -User=prometheus -EnvironmentFile=/etc/default/prometheus-blackbox-exporter -ExecStart=/usr/local/bin/prometheus-blackbox-exporter $ARGS -ExecReload=/bin/kill -HUP $MAINPID -AmbientCapabilities=CAP_NET_RAW -NoNewPrivileges=true - -[Install] -WantedBy=multi-user.target \ No newline at end of file diff --git a/roles/prometheus/templates/prometheus-job.yml b/roles/prometheus/templates/prometheus-job.yml deleted file mode 100644 index 13c27f6..0000000 --- a/roles/prometheus/templates/prometheus-job.yml +++ /dev/null @@ -1,52 +0,0 @@ - -- job_name: {{ job.name }} - {% if 'scrape_interval' in job -%} - scrape_interval: {{ job.scrape_interval }} - {% endif -%} - {% if 'blackbox' in job -%} - metrics_path: /probe - relabel_configs: - - source_labels: - - __address__ - target_label: __param_target - - source_labels: - - __param_target - target_label: instance - - target_label: __address__ - replacement: localhost:9115 - params: - module: - {% for module in job.blackbox.modules.keys() -%} - - {{ module }} - {%- endfor %} - {% endif %} - - {% if 'scheme' in job -%} - scheme: {{ job.scheme }} - {% endif -%} - - {% if 'basic_auth_user' in job and 'basic_auth_password' in job -%} - basic_auth: - username: {{ job.basic_auth_user }} - password: {{ job.basic_auth_password }} - {% endif -%} - - {% if 'tls_config' in job -%} - tls_config: - ca_file: {{ job.tls_config.ca_file }} - cert_file: {{ job.tls_config.cert_file }} - key_file: {{ job.tls_config.key_file }} - insecure_skip_verify: {{ job.tls_config.insecure_skip_verify | default(false) }} - {% endif -%} - - static_configs: - - targets: - {% for host in groups['all'] -%} - {% if hostvars[host]['monitor_' + job.name] | default(false) | bool -%} - {%- set fstring = {} -%} - {%- set ignored = fstring.update(job.defaults|default({})) -%} - {%- set ignored = fstring.update(hostvars[host]) -%} - - {{ job.target_format | format(**fstring) | format(**fstring) }} - {% endif -%} - {% endfor -%} - diff --git a/roles/prometheus_node/tasks/install.yml b/roles/prometheus_node/tasks/install.yml index 297533b..7d4c012 100644 --- a/roles/prometheus_node/tasks/install.yml +++ b/roles/prometheus_node/tasks/install.yml @@ -7,7 +7,7 @@ paths: - tasks/install files: - - "{% if 'id' in ansible_lsb %}{{ ansible_lsb.id }}{% else %}{{ ansible_distribution }}{% endif %}_{% if 'major_release' in ansible_lsb %}{{ ansible_lsb.major_release }}{% else %}{{ ansible_distribution_version}}{% endif %}.yml" + - "{% if 'id' in ansible_lsb %}{{ ansible_lsb.id }}{% else %}{{ ansible_distribution }}{% endif %}_{% if 'major_release' in ansible_lsb %}{{ ansible_lsb.major_release }}{% else %}{{ ansible_distribution_version }}{% endif %}.yml" - "{% if 'id' in ansible_lsb %}{{ ansible_lsb.id }}{% else %}{{ ansible_distribution }}{% endif %}.yml" - "{{ ansible_distribution }}_{{ ansible_distribution_version }}.yml" - "{{ ansible_distribution }}.yml"