From d06406c9f4d1333484eaa2e0e5c7cf991986d602 Mon Sep 17 00:00:00 2001
From: polyfloyd <floyd@polyfloyd.net>
Date: Sun, 24 Sep 2023 15:26:04 +0200
Subject: [PATCH] Add monitoring role

---
 group_vars/monitoring.yaml                    | 37 ++++++++
 hosts.yaml                                    |  3 +
 monitoring.yaml                               |  6 ++
 roles/monitoring/defaults/main.yaml           | 11 +++
 roles/monitoring/handlers/main.yaml           | 17 ++++
 roles/monitoring/tasks/blackbox.yaml          |  4 +
 roles/monitoring/tasks/grafana.yaml           | 46 ++++++++++
 roles/monitoring/tasks/main.yaml              | 66 ++++++++++++++
 roles/monitoring/tasks/mqtt_exporter.yaml     | 46 ++++++++++
 roles/monitoring/tasks/prometheus.yaml        | 27 ++++++
 .../templates/grafana-data-source.yml         |  4 +
 roles/monitoring/templates/grafana-ldap.toml  | 85 +++++++++++++++++++
 roles/monitoring/templates/grafana.ini        | 85 +++++++++++++++++++
 .../templates/mqtt_exporter.service           | 14 +++
 .../templates/mqtt_exporter_config.yaml       | 25 ++++++
 roles/monitoring/templates/nginx-site.conf    | 45 ++++++++++
 roles/monitoring/templates/prometheus.yml     | 18 ++++
 17 files changed, 539 insertions(+)
 create mode 100644 group_vars/monitoring.yaml
 create mode 100644 monitoring.yaml
 create mode 100644 roles/monitoring/defaults/main.yaml
 create mode 100644 roles/monitoring/handlers/main.yaml
 create mode 100644 roles/monitoring/tasks/blackbox.yaml
 create mode 100644 roles/monitoring/tasks/grafana.yaml
 create mode 100644 roles/monitoring/tasks/main.yaml
 create mode 100644 roles/monitoring/tasks/mqtt_exporter.yaml
 create mode 100644 roles/monitoring/tasks/prometheus.yaml
 create mode 100644 roles/monitoring/templates/grafana-data-source.yml
 create mode 100644 roles/monitoring/templates/grafana-ldap.toml
 create mode 100644 roles/monitoring/templates/grafana.ini
 create mode 100644 roles/monitoring/templates/mqtt_exporter.service
 create mode 100644 roles/monitoring/templates/mqtt_exporter_config.yaml
 create mode 100644 roles/monitoring/templates/nginx-site.conf
 create mode 100644 roles/monitoring/templates/prometheus.yml

diff --git a/group_vars/monitoring.yaml b/group_vars/monitoring.yaml
new file mode 100644
index 0000000..80bdc83
--- /dev/null
+++ b/group_vars/monitoring.yaml
@@ -0,0 +1,37 @@
+monitoring_domain: monitoring.bitlair.nl
+monitoring_bootstrap_cert: no
+acme_domains:
+  - "{{ monitoring_domain }}"
+
+prometheus_scrape_configs:
+  - job_name: "node"
+    static_configs:
+      - targets:
+        - "localhost:9100"
+        - "bitlair.nl:9100"
+        - "git.bitlair.nl:9100"
+        - "mqtt.bitlair.nl:9100"
+        - "pad.bitlair.nl:9100"
+  - job_name: "mqtt"
+    static_configs:
+      - targets: [ "localhost:9883" ]
+  - job_name: "blackbox"
+    scrape_interval: 5m
+    metrics_path: /probe
+    params:
+      module: [http_2xx]  # Look for a HTTP 200 response.
+    static_configs:
+      - targets:
+        - https://bitlair.nl
+        - https://git.bitlair.nl
+        - https://pad.bitlair.nl
+          # Legacy
+        - https://dashboard.bitlair.nl
+        - https://wiki.bitlair.nl
+    relabel_configs:
+      - source_labels: [__address__]
+        target_label: __param_target
+      - source_labels: [__param_target]
+        target_label: instance
+      - target_label: __address__
+        replacement: "{{ blackbox_exporter_web_listen_address }}"
diff --git a/hosts.yaml b/hosts.yaml
index b7b9930..dbd5690 100644
--- a/hosts.yaml
+++ b/hosts.yaml
@@ -26,6 +26,9 @@ all:
         mqtt_internal:
           hosts:
             mqtt.bitlair.nl:
+        monitoring:
+          hosts:
+            monitoring.bitlair.nl:
         music:
           hosts:
             music.bitlair.nl:
diff --git a/monitoring.yaml b/monitoring.yaml
new file mode 100644
index 0000000..7ac7e32
--- /dev/null
+++ b/monitoring.yaml
@@ -0,0 +1,6 @@
+---
+- hosts: monitoring
+  roles:
+    - common
+    - acme
+    - monitoring
diff --git a/roles/monitoring/defaults/main.yaml b/roles/monitoring/defaults/main.yaml
new file mode 100644
index 0000000..11facb6
--- /dev/null
+++ b/roles/monitoring/defaults/main.yaml
@@ -0,0 +1,11 @@
+monitoring_mqtt_exporter_port: 9883
+prometheus_config_dir: /etc/prometheus
+prometheus_web_listen_address: "[::1]:9090"
+prometheus_storage_retention: 730d
+blackbox_exporter_web_listen_address: "[::1]:9115"
+grafana_url: "https://{{ monitoring_domain }}"
+grafana_domain: "{{ monitoring_domain }}"
+grafana_address: "::1"
+grafana_port: 9000
+grafana_admin_password: "{{ lookup('password', '/tmp/monitoring_grafana_password length=32') }}"
+grafana_data_dir: /var/lib/grafana
diff --git a/roles/monitoring/handlers/main.yaml b/roles/monitoring/handlers/main.yaml
new file mode 100644
index 0000000..ee9948d
--- /dev/null
+++ b/roles/monitoring/handlers/main.yaml
@@ -0,0 +1,17 @@
+---
+- import_tasks: ../../common/handlers/main.yaml
+
+- name: restart mqtt_exporter
+  systemd:
+    name: mqtt_exporter
+    state: restarted
+
+- name: restart prometheus
+  systemd:
+    name: prometheus
+    state: restarted
+
+- name: restart grafana
+  systemd:
+    name: grafana-server
+    state: restarted
diff --git a/roles/monitoring/tasks/blackbox.yaml b/roles/monitoring/tasks/blackbox.yaml
new file mode 100644
index 0000000..2385022
--- /dev/null
+++ b/roles/monitoring/tasks/blackbox.yaml
@@ -0,0 +1,4 @@
+---
+- name: Install blackbox exporter
+  apt:
+    name: prometheus-blackbox-exporter
diff --git a/roles/monitoring/tasks/grafana.yaml b/roles/monitoring/tasks/grafana.yaml
new file mode 100644
index 0000000..b730f17
--- /dev/null
+++ b/roles/monitoring/tasks/grafana.yaml
@@ -0,0 +1,46 @@
+---
+- name: Add key
+  get_url:
+    url: https://apt.grafana.com/gpg.key
+    dest: /etc/apt/keyrings/grafana.asc
+  notify: apt update
+
+- name: Grafana source
+  copy:
+    dest: /etc/apt/sources.list.d/grafana.list
+    content: "deb [signed-by=/etc/apt/keyrings/grafana.asc] https://apt.grafana.com stable main"
+  notify: apt update
+
+- meta: flush_handlers
+
+- name: Install Grafana
+  apt:
+    name: grafana
+
+- name: Configure grafana
+  template:
+    src: "{{ item.src }}"
+    dest: "{{ item.dest }}"
+    owner: root
+    group: root
+    mode: 0644
+  notify: restart grafana
+  with_items:
+    - { src: grafana.ini, dest: /etc/grafana/grafana.ini }
+    - { src: grafana-ldap.toml, dest: /etc/grafana/ldap.toml }
+
+- name: Configure grafana data source
+  template:
+    src: grafana-data-source.yml
+    dest: "/etc/grafana/provisioning/datasources/{{ item.name | lower }}.yaml"
+    owner: root
+    group: root
+    mode: 0644
+  notify: restart grafana
+  with_items:
+    - name: Prometheus
+      type: prometheus
+      access: proxy
+      url: 'http://{{ prometheus_web_listen_address }}/prometheus'
+      basicAuth: false
+      isDefault: true
diff --git a/roles/monitoring/tasks/main.yaml b/roles/monitoring/tasks/main.yaml
new file mode 100644
index 0000000..f629658
--- /dev/null
+++ b/roles/monitoring/tasks/main.yaml
@@ -0,0 +1,66 @@
+---
+- name: monitoring
+  tags: monitoring
+  block:
+    - name: Install dependencies
+      apt:
+        name: nginx
+        state: present
+
+    - name: Clear default nginx site
+      file:
+        state: absent
+        path: /etc/nginx/sites-enabled/default
+      notify: reload nginx
+
+    - name: Install nginx site
+      template:
+        src: nginx-site.conf
+        dest: /etc/nginx/sites-available/monitoring
+        owner: root
+        group: root
+        mode: 0644
+      notify: reload nginx
+
+    - name: Enable nginx site
+      file:
+        src: /etc/nginx/sites-available/monitoring
+        dest: /etc/nginx/sites-enabled/monitoring
+        state: link
+      notify: reload nginx
+
+    - name: Start nginx
+      systemd:
+        name: nginx
+        state: started
+        enabled: yes
+
+    - name: Allow HTTP/HTTPS
+      iptables:
+        chain: INPUT
+        protocol: tcp
+        destination_port: "{{ item.port }}"
+        ctstate: NEW
+        jump: ACCEPT
+        ip_version: "{{ item.ip }}"
+        action: insert
+      with_items:
+        - { ip: ipv6, port: 80 }
+        - { ip: ipv6, port: 443 }
+      notify: persist iptables
+
+- name: mqtt_exporter
+  tags: mqtt_exporter
+  import_tasks: mqtt_exporter.yaml
+
+- name: blackbox
+  tags: blackbox
+  import_tasks: blackbox.yaml
+
+- name: prometheus
+  tags: prometheus
+  import_tasks: prometheus.yaml
+
+- name: grafana
+  tags: grafana
+  import_tasks: grafana.yaml
diff --git a/roles/monitoring/tasks/mqtt_exporter.yaml b/roles/monitoring/tasks/mqtt_exporter.yaml
new file mode 100644
index 0000000..2fee482
--- /dev/null
+++ b/roles/monitoring/tasks/mqtt_exporter.yaml
@@ -0,0 +1,46 @@
+---
+- name: Clone source
+  git:
+    repo: https://github.com/polyfloyd/mqtt-exporter.git
+    version: main
+    dest: /opt/mqtt_exporter
+    accept_hostkey: yes
+  notify: restart mqtt_exporter
+
+- name: Install apt dependencies
+  apt:
+    name:
+      - python3-paho-mqtt
+      - python3-prometheus-client
+      - python3-yaml
+    state: present
+
+- name: Install service
+  template:
+    src: mqtt_exporter.service
+    dest: /etc/systemd/system/mqtt_exporter.service
+    owner: root
+    group: root
+    mode: 0644
+  notify:
+    - daemon reload
+    - restart mqtt_exporter
+
+- name: Install config file
+  template:
+    src: mqtt_exporter_config.yaml
+    dest: /etc/mqtt_exporter.yaml
+    owner: root
+    group: root
+    mode: 0644
+  notify:
+    - daemon reload
+    - restart mqtt_exporter
+
+- meta: flush_handlers
+
+- name: Start service
+  systemd:
+    name: mqtt_exporter
+    state: started
+    enabled: true
diff --git a/roles/monitoring/tasks/prometheus.yaml b/roles/monitoring/tasks/prometheus.yaml
new file mode 100644
index 0000000..f6c5865
--- /dev/null
+++ b/roles/monitoring/tasks/prometheus.yaml
@@ -0,0 +1,27 @@
+---
+- name: Install dependencies
+  apt:
+    name: prometheus
+
+- name: Configure Prometheus
+  template:
+    src: prometheus.yml
+    dest: "{{ prometheus_config_dir }}/prometheus.yml"
+    owner: root
+    group: root
+    mode: 0644
+  notify: restart prometheus
+
+- name: Configure Prometheus args
+  lineinfile:
+    path: /etc/default/prometheus
+    line: >-
+      ARGS="
+      --storage.tsdb.retention.time={{ prometheus_storage_retention }}
+      --storage.tsdb.retention.size=0
+      --web.listen-address={{ prometheus_web_listen_address }}
+      --web.external-url=prometheus
+      --config.file={{ prometheus_config_dir }}/prometheus.yml
+      "
+    regexp: '^ARGS'
+  notify: restart prometheus
diff --git a/roles/monitoring/templates/grafana-data-source.yml b/roles/monitoring/templates/grafana-data-source.yml
new file mode 100644
index 0000000..6f9f460
--- /dev/null
+++ b/roles/monitoring/templates/grafana-data-source.yml
@@ -0,0 +1,4 @@
+apiVersion: 1
+deleteDatasources: []
+datasources:
+  {{ [item] | to_nice_yaml | indent(2) }}
diff --git a/roles/monitoring/templates/grafana-ldap.toml b/roles/monitoring/templates/grafana-ldap.toml
new file mode 100644
index 0000000..3e03e8d
--- /dev/null
+++ b/roles/monitoring/templates/grafana-ldap.toml
@@ -0,0 +1,85 @@
+# Managed by Ansible
+
+# Set to true to log user information returned from LDAP
+verbose_logging = true
+
+[[servers]]
+# Ldap server host (specify multiple hosts space separated)
+host = "ldap.bitlair.nl"
+# Default port is 389 or 636 if use_ssl = true
+port = 636
+# Set to true if ldap server supports TLS
+use_ssl = true
+# Set to true if connect ldap server with STARTTLS pattern (create connection in insecure, then upgrade to secure connection with TLS)
+start_tls = false
+# set to true if you want to skip ssl cert validation
+ssl_skip_verify = false
+# set to the path to your root CA certificate or leave unset to use system defaults
+# root_ca_cert = /path/to/certificate.crt
+
+# Search user bind dn
+bind_dn = "cn=grafana,ou=System,dc=bitlair,dc=nl"
+# Search user bind password
+bind_password = 'VHfVwAYB6tsEZHuX'
+
+# User search filter, for example "(cn=%s)" or "(sAMAccountName=%s)" or "(uid=%s)"
+search_filter = "(uid=%s)"
+
+# An array of base dns to search through
+search_base_dns = ["dc=bitlair,dc=nl"]
+
+# In POSIX LDAP schemas, without memberOf attribute a secondary query must be made for groups.
+# This is done by enabling group_search_filter below. You must also set member_of= "cn"
+# in [servers.attributes] below.
+
+# Users with nested/recursive group membership and an LDAP server that supports LDAP_MATCHING_RULE_IN_CHAIN
+# can set group_search_filter, group_search_filter_user_attribute, group_search_base_dns and member_of
+# below in such a way that the user's recursive group membership is considered.
+#
+# Nested Groups + Active Directory (AD) Example:
+#
+#   AD groups store the Distinguished Names (DNs) of members, so your filter must
+#   recursively search your groups for the authenticating user's DN. For example:
+#
+#     group_search_filter = "(member:1.2.840.113556.1.4.1941:=%s)"
+#     group_search_filter_user_attribute = "distinguishedName"
+#     group_search_base_dns = ["ou=groups,dc=grafana,dc=org"]
+#
+#     [servers.attributes]
+#     ...
+#     member_of = "distinguishedName"
+
+## Group search filter, to retrieve the groups of which the user is a member (only set if memberOf attribute is not available)
+group_search_filter = "(&(objectClass=posixGroup)(memberUid=%s))"
+## Group search filter user attribute defines what user attribute gets substituted for %s in group_search_filter.
+## Defaults to the value of username in [server.attributes]
+## Valid options are any of your values in [servers.attributes]
+## If you are using nested groups you probably want to set this and member_of in
+## [servers.attributes] to "distinguishedName"
+# group_search_filter_user_attribute = "distinguishedName"
+## An array of the base DNs to search through for groups. Typically uses ou=groups
+group_search_base_dns = ["ou=Groups,dc=bitlair,dc=nl"]
+
+# Specify names of the ldap attributes your ldap uses
+[servers.attributes]
+name = "givenName"
+surname = "sn"
+username = "uid"
+email =  "mail"
+member_of = "cn"
+
+# Map ldap groups to grafana org roles
+[[servers.group_mappings]]
+group_dn = "Admins"
+org_role = "Admin"
+# The Grafana organization database id, optional, if left out the default org (id 1) will be used
+# org_id = 1
+
+[[servers.group_mappings]]
+group_dn = "Members"
+org_role = "Editor"
+
+[[servers.group_mappings]]
+# If you want to match all (or no ldap groups) then you can use wildcard
+group_dn = "*"
+org_role = "Viewer"
diff --git a/roles/monitoring/templates/grafana.ini b/roles/monitoring/templates/grafana.ini
new file mode 100644
index 0000000..3b2bbd0
--- /dev/null
+++ b/roles/monitoring/templates/grafana.ini
@@ -0,0 +1,85 @@
+# Managed by Asnible
+
+app_mode = production
+instance_name = monitoring
+
+# Directories
+[paths]
+data = /var/lib/grafana
+logs = /var/log/grafana
+plugins = /var/lib/grafana/plugins
+
+# HTTP options
+[server]
+http_addr = {{ grafana_address }}
+http_port = {{ grafana_port }}
+domain = {{ grafana_domain }}
+root_url = {{ grafana_url }}
+protocol = http
+enforce_domain = False
+socket =
+cert_key =
+cert_file =
+enable_gzip = False
+static_root_path = public
+router_logging = False
+serve_from_sub_path = False
+
+# Database
+[database]
+type = sqlite3
+
+# Remote cache
+[remote_cache]
+
+# Security
+[security]
+admin_user = admin
+admin_password = {{ grafana_admin_password }}
+
+# Users management and registration
+[users]
+allow_sign_up = False
+auto_assign_org_role = Viewer
+default_theme = dark
+
+[emails]
+welcome_email_on_sign_up = False
+
+# Analytics
+[analytics]
+reporting_enabled = "True"
+
+# Dashboards
+[dashboards]
+versions_to_keep = 20
+
+[dashboards.json]
+enabled = true
+path = /var/lib/grafana/dashboards
+
+# Alerting
+[alerting]
+enabled = true
+execute_alerts = True
+
+# SMTP and email config
+
+# Logging
+[log]
+mode = console, file
+level = info
+
+# Grafana.com configuration
+[grafana_com]
+url = https://grafana.com
+
+[auth.anonymous]
+enabled = true
+org_name = Bitlair
+org_role = Viewer
+
+[auth.ldap]
+enabled = true
+config_file = /etc/grafana/ldap.toml
+allow_sign_up = true
diff --git a/roles/monitoring/templates/mqtt_exporter.service b/roles/monitoring/templates/mqtt_exporter.service
new file mode 100644
index 0000000..25b0294
--- /dev/null
+++ b/roles/monitoring/templates/mqtt_exporter.service
@@ -0,0 +1,14 @@
+[Unit]
+Description=Prometheus exporter for MQTT
+After=network.target
+
+[Service]
+Type=simple
+ExecStart=/opt/mqtt_exporter/mqtt-exporter.py /etc/mqtt_exporter.yaml
+Restart=always
+RestartSec=10
+DynamicUser=true
+AmbientCapabilities=CAP_NET_RAW,CAP_NET_ADMIN+eip
+
+[Install]
+WantedBy=multi-user.target
diff --git a/roles/monitoring/templates/mqtt_exporter_config.yaml b/roles/monitoring/templates/mqtt_exporter_config.yaml
new file mode 100644
index 0000000..553883d
--- /dev/null
+++ b/roles/monitoring/templates/mqtt_exporter_config.yaml
@@ -0,0 +1,25 @@
+mqtt:
+  host: {{ mqtt_public_host }}
+  port: 1883
+
+prometheus:
+  port: {{ monitoring_mqtt_exporter_port }}
+
+export:
+  - subscribe: bitlair/#
+  - subscribe: bitlair/climate/+location/#
+  - subscribe: bitlair/wifi/+ssid/#
+  - subscribe: bitlair/state
+    value_map:
+      open: 1
+      closed: 0
+  - subscribe: bitlair/pos/product
+    metric_type: counter
+    labels:
+      product: enum
+  - subscribe: bitlair/collectd/bitlair-5406/snmp/if_octets-traffic.D15
+    metric_name: bitlair_internet_rx
+    value_regex: "^.+:(.+):"
+  - subscribe: bitlair/collectd/bitlair-5406/snmp/if_octets-traffic.D15
+    metric_name: bitlair_internet_tx
+    value_regex: "^.+:.+:([\\d\\.]+)"
diff --git a/roles/monitoring/templates/nginx-site.conf b/roles/monitoring/templates/nginx-site.conf
new file mode 100644
index 0000000..b2f21fc
--- /dev/null
+++ b/roles/monitoring/templates/nginx-site.conf
@@ -0,0 +1,45 @@
+server {
+	listen 443 ssl http2 default_server;
+	listen [::]:443 ssl http2;
+	server_name {{ monitoring_domain }};
+
+	{% if monitoring_bootstrap_cert %}
+	include "snippets/snakeoil.conf";
+	{% else %}
+	ssl_certificate     "/var/lib/dehydrated/certs/{{ monitoring_domain }}/fullchain.pem";
+	ssl_certificate_key "/var/lib/dehydrated/certs/{{ monitoring_domain }}/privkey.pem";
+	{% endif %}
+
+	add_header X-Robots-Tag noindex;
+
+	location / {
+		proxy_pass http://localhost:9000/;
+		include proxy_params;
+	}
+
+	location /prometheus/ {
+		proxy_pass http://localhost:9090/prometheus/;
+		include proxy_params;
+
+		{% for range in trusted_ranges %}
+		allow "{{ range.cidr }}";
+		{% endfor %}
+		allow "127.0.0.1";
+		allow "::1";
+		deny all;
+	}
+
+	include "snippets/acme.conf";
+}
+
+server {
+	listen 80 default_server;
+	listen [::]:80;
+	server_name {{ monitoring_domain }};
+
+	location / {
+		rewrite ^/(.*) https://$server_name$request_uri? redirect;
+	}
+
+	include "snippets/acme.conf";
+}
diff --git a/roles/monitoring/templates/prometheus.yml b/roles/monitoring/templates/prometheus.yml
new file mode 100644
index 0000000..c512949
--- /dev/null
+++ b/roles/monitoring/templates/prometheus.yml
@@ -0,0 +1,18 @@
+# Managed by Ansible
+
+global:
+  evaluation_interval: 1m
+  scrape_interval: 1m
+
+  external_labels:
+    environment: monitoring
+
+rule_files:
+  - /etc/prometheus/rules/*.rules
+
+scrape_configs:
+  - job_name: "prometheus"
+    metrics_path: "/prometheus/metrics"
+    static_configs:
+      - targets: [ "localhost:9090" ]
+  {{ prometheus_scrape_configs | to_nice_yaml | indent(2) }}