Vagrant icinga / grafana

This commit is contained in:
benoit 2021-12-31 16:11:08 +01:00
parent 561c3ed9da
commit b1611e2f78
11 changed files with 2787 additions and 0 deletions

1
.gitignore vendored
View file

@ -1,4 +1,5 @@
*/__pycache__/
check_patroni.egg-info
test/*.state_file
test/vagrant/.vagrant
.*.swp

29
test/vagrant/LICENSE Normal file
View file

@ -0,0 +1,29 @@
BSD 3-Clause License
Copyright (c) 2019, Jehan-Guillaume (ioguix) de Rorthais
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

22
test/vagrant/Makefile Normal file
View file

@ -0,0 +1,22 @@
export VAGRANT_BOX_UPDATE_CHECK_DISABLE=1
export VAGRANT_CHECKPOINT_DISABLE=1
.PHONY: all prov validate
all: prov
prov:
vagrant up --provision
clean:
vagrant destroy -f
validate:
@vagrant validate
@if which shellcheck >/dev/null ;\
then shellcheck provision/* ;\
else echo "WARNING: shellcheck is not in PATH, not checking bash syntax" ;\
fi

127
test/vagrant/README.md Normal file
View file

@ -0,0 +1,127 @@
# Icinga
## Install
Create the VM:
```
make
```
## IcingaWeb
Configure Icingaweb :
```
http://$IP/icingaweb2/setup
```
* Screen 1: Welcome
Use the icinga token given a the end of the `icinga2-setup` provision, or:
```
sudo icingacli setup token show
```
Next
* Screen 2: Modules
Activate Monitor (already set)
Next
* Screen 3: Icinga Web 2
Next
* Screen 4: Authentication
Next
* Screen 5: Database Resource
Database Name: icingaweb_db
Username: supervisor
Password: th3Pass
Charset: UTF8
Validate
Next
* Screen 6: Authentication Backend
Next
* Screen 7: Administration
Fill the blanks
Next
* Screen 8: Application Configuration
Next
* Screen 9: Summary
Next
* Screen 10: Welcome ... again
Next
* Screen 11: Monitoring IDO Resource
Database Name: icinga2
Username: supervisor
Password: th3Pass
Charset: UTF8
Validate
Next
* Screen 12: Command Transport
Transaport name: icinga2
Transport Type: API
Host: 127.0.0.1
Port: 5665
User: icinga_api
Password: th3Pass
Next
* Screen 13: Monitoring Security
Next
* Screen 14: Summary
Finish
* Screen 15: Hopefuly success
Login
## Add servers to icinga
```
# Connect to the vm
vagrant ssh s1
# Create /etc/icinga2/conf.d/check_patroni.conf
sudo /vagrant/provision/director.bash init cluster1 p1=10.20.89.54 p2=10.20.89.55
# Check and load conf
sudo icinga2 daemon -C
sudo systemctl restart icinga2.service
```
# Grafana
Connect to: http://10.20.89.52:3000/login
User / pass: admin/admin
Import the dashboards for the grafana directory. They are created for cluster1,
and servers p1, p2.

62
test/vagrant/Vagrantfile vendored Normal file
View file

@ -0,0 +1,62 @@
require 'ipaddr'
#require 'yaml'
ENV["LC_ALL"] = 'en_US.utf8'
myBox = 'debian/buster64'
myProvider = 'libvirt'
pgver = 11
start_ip = '10.20.89.51'
etcd_nodes = []
patroni_nodes = []
sup_nodes = ['s1']
Vagrant.configure(2) do |config|
config.vm.provider myProvider
next_ip = IPAddr.new(start_ip).succ
host_ip = (IPAddr.new(start_ip) & "255.255.255.0").succ.to_s
nodes_ips = {}
( patroni_nodes + etcd_nodes + sup_nodes ).each do |node|
nodes_ips[node] = next_ip.to_s
next_ip = next_ip.succ
end
# don't mind about insecure ssh key
config.ssh.insert_key = false
# https://vagrantcloud.com/search.
config.vm.box = myBox
# hardware and host settings
config.vm.provider 'libvirt' do |lv|
lv.cpus = 1
lv.memory = 512
lv.watchdog model: 'i6300esb'
lv.default_prefix = 'patroni_'
lv.qemu_use_session = false
end
# disable default share (NFS is not working directly in DEBIAN 10)
config.vm.synced_folder ".", "/vagrant", type: "rsync"
config.vm.synced_folder "/home/benoit/git/dalibo/check_patroni", "/check_patroni", type: "rsync"
## allow root@vm to ssh to ssh_login@network_1
#config.vm.synced_folder 'ssh', '/root/.ssh', type: 'rsync',
# owner: 'root', group: 'root',
# rsync__args: [ "--verbose", "--archive", "--delete", "--copy-links", "--no-perms" ]
# system setup for sup nodes
(sup_nodes).each do |node|
config.vm.define node do |conf|
conf.vm.network 'private_network', ip: nodes_ips[node]
conf.vm.provision 'icinga2-setup', type: 'shell', path: 'provision/icinga2.bash',
args: [ node ],
preserve_order: true
conf.vm.provision 'check_patroni', type: 'shell', path: 'provision/check_patroni.bash',
preserve_order: true
end
end
end

View file

@ -0,0 +1,793 @@
{
"__inputs": [
{
"name": "DS_OPM",
"label": "opm",
"description": "",
"type": "datasource",
"pluginId": "postgres",
"pluginName": "PostgreSQL"
},
{
"name": "VAR_CLUSTER_NAME",
"type": "constant",
"label": "cluster_name",
"value": "cluster1",
"description": ""
}
],
"__elements": [],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "8.3.3"
},
{
"type": "datasource",
"id": "postgres",
"name": "PostgreSQL",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"iteration": 1640960519458,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 20,
"x": 0,
"y": 0
},
"id": 14,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$cluster_name' AND s.service = 'check_patroni_cluster_has_replica'\n ) \n AND m.label ilike '%lag%' \nGROUP BY time, m.label ORDER BY time",
"refId": "A",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
}
],
"title": "Cluster replica lag",
"type": "timeseries"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 2,
"w": 4,
"x": 20,
"y": 0
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$cluster_name' AND s.service = 'check_patroni_cluster_has_leader'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "A",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
}
],
"title": "Cluster has primary",
"type": "stat"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 2,
"w": 4,
"x": 20,
"y": 2
},
"id": 10,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$cluster_name' AND s.service = 'check_patroni_cluster_config_has_changed'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "A",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
}
],
"title": "Cluster config has changed",
"type": "stat"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 2,
"w": 4,
"x": 20,
"y": 4
},
"id": 8,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$cluster_name' AND s.service = 'check_patroni_cluster_is_in_maintenance'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "A",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
}
],
"title": "Cluster is in maintenance",
"type": "stat"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "role_leader"
},
"properties": [
{
"id": "displayName",
"value": "leader"
}
]
},
{
"matcher": {
"id": "byName",
"options": "role_replica"
},
"properties": [
{
"id": "displayName",
"value": "replicas"
}
]
},
{
"matcher": {
"id": "byName",
"options": "state_running"
},
"properties": [
{
"id": "displayName",
"value": "running"
}
]
}
]
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 6
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"orientation": "vertical",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM public.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id \n FROM public.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$cluster_name' AND s.service = 'check_patroni_cluster_node_count'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "A",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
}
],
"title": "Cluster node count",
"type": "stat"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "healthy_replica"
},
"properties": [
{
"id": "displayName",
"value": "healthy"
}
]
},
{
"matcher": {
"id": "byName",
"options": "unhealthy_replica"
},
"properties": [
{
"id": "displayName",
"value": "unhealthy"
}
]
}
]
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 6
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM public.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id \n FROM public.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$cluster_name' AND s.service = 'check_patroni_cluster_has_replica'\n )\n AND m.label IN('healthy_replica','unhealthy_replica') \n GROUP BY time, m.label ORDER BY time",
"refId": "A",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
}
],
"title": "Cluster has replica",
"type": "timeseries"
}
],
"refresh": "",
"schemaVersion": 34,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"hide": 2,
"name": "cluster_name",
"query": "${VAR_CLUSTER_NAME}",
"skipUrlSync": false,
"type": "constant",
"current": {
"value": "${VAR_CLUSTER_NAME}",
"text": "${VAR_CLUSTER_NAME}",
"selected": false
},
"options": [
{
"value": "${VAR_CLUSTER_NAME}",
"text": "${VAR_CLUSTER_NAME}",
"selected": false
}
]
},
{
"auto": false,
"auto_count": 30,
"auto_min": "10s",
"current": {
"selected": true,
"text": "1m",
"value": "1m"
},
"hide": 0,
"name": "interval",
"options": [
{
"selected": true,
"text": "1m",
"value": "1m"
},
{
"selected": false,
"text": "10m",
"value": "10m"
},
{
"selected": false,
"text": "30m",
"value": "30m"
},
{
"selected": false,
"text": "1h",
"value": "1h"
},
{
"selected": false,
"text": "6h",
"value": "6h"
},
{
"selected": false,
"text": "12h",
"value": "12h"
},
{
"selected": false,
"text": "1d",
"value": "1d"
},
{
"selected": false,
"text": "7d",
"value": "7d"
},
{
"selected": false,
"text": "14d",
"value": "14d"
},
{
"selected": false,
"text": "30d",
"value": "30d"
}
],
"query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
"queryValue": "",
"refresh": 2,
"skipUrlSync": false,
"type": "interval"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Cluster status: cluster1",
"uid": "4BullO0nk",
"version": 10,
"weekStart": ""
}

View file

@ -0,0 +1,496 @@
{
"__inputs": [
{
"name": "DS_OPM",
"label": "opm",
"description": "",
"type": "datasource",
"pluginId": "postgres",
"pluginName": "PostgreSQL"
},
{
"name": "VAR_NODE_NAME",
"type": "constant",
"label": "node_name",
"value": "p1",
"description": ""
}
],
"__elements": [],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "8.3.3"
},
{
"type": "datasource",
"id": "postgres",
"name": "PostgreSQL",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"iteration": 1640961009033,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "is_primary"
},
"properties": [
{
"id": "displayName",
"value": "Primaire"
}
]
},
{
"matcher": {
"id": "byName",
"options": "is_replica"
},
"properties": [
{
"id": "displayName",
"value": "Secondaire"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$node_name' AND s.service = 'check_patroni_node_is_primary'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "A",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"hide": false,
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$node_name' AND s.service = 'check_patroni_node_is_replica'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "B",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
}
],
"title": "Node type",
"type": "stat"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "is_alive"
},
"properties": [
{
"id": "displayName",
"value": "Node is alive"
}
]
},
{
"matcher": {
"id": "byName",
"options": "is_pending_restart"
},
"properties": [
{
"id": "displayName",
"value": "Node is pending restart"
}
]
},
{
"matcher": {
"id": "byName",
"options": "timeline"
},
"properties": [
{
"id": "displayName",
"value": "Current timeline"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 0
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$node_name' AND s.service = 'check_patroni_node_is_alive'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "A",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"hide": false,
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$node_name' AND s.service = 'check_patroni_node_tl_has_changed'\n )\nAND m.label = 'timeline'\nGROUP BY time, m.label ORDER BY time",
"refId": "B",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"hide": false,
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$node_name' AND s.service = 'check_patroni_node_is_pending_restart'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "D",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
}
],
"title": "Health stats",
"type": "stat"
}
],
"schemaVersion": 34,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"hide": 2,
"name": "node_name",
"query": "${VAR_NODE_NAME}",
"skipUrlSync": false,
"type": "constant",
"current": {
"value": "${VAR_NODE_NAME}",
"text": "${VAR_NODE_NAME}",
"selected": false
},
"options": [
{
"value": "${VAR_NODE_NAME}",
"text": "${VAR_NODE_NAME}",
"selected": false
}
]
},
{
"auto": false,
"auto_count": 30,
"auto_min": "10s",
"current": {
"selected": false,
"text": "1m",
"value": "1m"
},
"hide": 0,
"name": "interval",
"options": [
{
"selected": true,
"text": "1m",
"value": "1m"
},
{
"selected": false,
"text": "10m",
"value": "10m"
},
{
"selected": false,
"text": "30m",
"value": "30m"
},
{
"selected": false,
"text": "1h",
"value": "1h"
},
{
"selected": false,
"text": "6h",
"value": "6h"
},
{
"selected": false,
"text": "12h",
"value": "12h"
},
{
"selected": false,
"text": "1d",
"value": "1d"
},
{
"selected": false,
"text": "7d",
"value": "7d"
},
{
"selected": false,
"text": "14d",
"value": "14d"
},
{
"selected": false,
"text": "30d",
"value": "30d"
}
],
"query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
"queryValue": "",
"refresh": 2,
"skipUrlSync": false,
"type": "interval"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Node status: p1",
"uid": "2LfUnFAnk",
"version": 1,
"weekStart": ""
}

View file

@ -0,0 +1,496 @@
{
"__inputs": [
{
"name": "DS_OPM",
"label": "opm",
"description": "",
"type": "datasource",
"pluginId": "postgres",
"pluginName": "PostgreSQL"
},
{
"name": "VAR_NODE_NAME",
"type": "constant",
"label": "node_name",
"value": "p2",
"description": ""
}
],
"__elements": [],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "8.3.3"
},
{
"type": "datasource",
"id": "postgres",
"name": "PostgreSQL",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"iteration": 1640960994907,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "is_primary"
},
"properties": [
{
"id": "displayName",
"value": "Primaire"
}
]
},
{
"matcher": {
"id": "byName",
"options": "is_replica"
},
"properties": [
{
"id": "displayName",
"value": "Secondaire"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$node_name' AND s.service = 'check_patroni_node_is_primary'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "A",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"hide": false,
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$node_name' AND s.service = 'check_patroni_node_is_replica'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "B",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
}
],
"title": "Node type",
"type": "stat"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "is_alive"
},
"properties": [
{
"id": "displayName",
"value": "Node is alive"
}
]
},
{
"matcher": {
"id": "byName",
"options": "is_pending_restart"
},
"properties": [
{
"id": "displayName",
"value": "Node is pending restart"
}
]
},
{
"matcher": {
"id": "byName",
"options": "timeline"
},
"properties": [
{
"id": "displayName",
"value": "Current timeline"
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 0
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$node_name' AND s.service = 'check_patroni_node_is_alive'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "A",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"hide": false,
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$node_name' AND s.service = 'check_patroni_node_tl_has_changed'\n )\nAND m.label = 'timeline'\nGROUP BY time, m.label ORDER BY time",
"refId": "B",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_OPM}"
},
"format": "time_series",
"group": [],
"hide": false,
"metricColumn": "none",
"rawQuery": true,
"rawSql": " SELECT $__timeGroup(timet, $interval) AS time, MAX(d.value), m.label AS metric\n FROM wh_nagios.metrics m,\nLATERAL wh_nagios.get_metric_data(m.id, $__timeFrom(), $__timeTo()) d\n WHERE m.id_service = (\n SELECT s.id FROM wh_nagios.services s \n JOIN public.servers h ON h.id=s.id_server\n WHERE h.hostname = '$node_name' AND s.service = 'check_patroni_node_is_pending_restart'\n ) GROUP BY time, m.label ORDER BY time",
"refId": "D",
"select": [
[
{
"params": [
"value"
],
"type": "column"
}
]
],
"timeColumn": "time",
"where": [
{
"name": "$__timeFilter",
"params": [],
"type": "macro"
}
]
}
],
"title": "Health stats",
"type": "stat"
}
],
"schemaVersion": 34,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"hide": 2,
"name": "node_name",
"query": "${VAR_NODE_NAME}",
"skipUrlSync": false,
"type": "constant",
"current": {
"value": "${VAR_NODE_NAME}",
"text": "${VAR_NODE_NAME}",
"selected": false
},
"options": [
{
"value": "${VAR_NODE_NAME}",
"text": "${VAR_NODE_NAME}",
"selected": false
}
]
},
{
"auto": false,
"auto_count": 30,
"auto_min": "10s",
"current": {
"selected": false,
"text": "1m",
"value": "1m"
},
"hide": 0,
"name": "interval",
"options": [
{
"selected": true,
"text": "1m",
"value": "1m"
},
{
"selected": false,
"text": "10m",
"value": "10m"
},
{
"selected": false,
"text": "30m",
"value": "30m"
},
{
"selected": false,
"text": "1h",
"value": "1h"
},
{
"selected": false,
"text": "6h",
"value": "6h"
},
{
"selected": false,
"text": "12h",
"value": "12h"
},
{
"selected": false,
"text": "1d",
"value": "1d"
},
{
"selected": false,
"text": "7d",
"value": "7d"
},
{
"selected": false,
"text": "14d",
"value": "14d"
},
{
"selected": false,
"text": "30d",
"value": "30d"
}
],
"query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
"queryValue": "",
"refresh": 2,
"skipUrlSync": false,
"type": "interval"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Node status: p2",
"uid": "2LfUnFAnkr",
"version": 1,
"weekStart": ""
}

View file

@ -0,0 +1,22 @@
#!/usr/bin/env bash
info (){
echo "$1"
}
set -o errexit
set -o nounset
set -o pipefail
info "#============================================================================="
info "# check_patroni"
info "#============================================================================="
DEBIAN_FRONTEND=noninteractive apt install -q -y git python3-pip
pip3 install --upgrade pip
cd /check_patroni
pip3 install .
ln -s /usr/local/bin/check_patroni /usr/lib/nagios/plugins/check_patroni
check_patroni --version

View file

@ -0,0 +1,392 @@
#!/usr/bin/env bash
info(){
echo "$1"
}
usage(){
echo "$0 ACTION CLUSTER_NAME [NODE..]"
echo ""
echo " ACTION: init | add"
echo " CLUSTER: cluster name"
echo " NODE: HOST=IP"
echo " HOST: any name for icinga"
echo " IP: the IP"
}
if [ "$#" -le "3" ]; then
usage
exit 1
fi
ACTION="$1"
shift
CLUSTER="$1"
shift
NODES=( "$@" )
TARGET="/etc/icinga2/conf.d/check_patroni.conf"
#set -o errexit
set -o nounset
set -o pipefail
init(){
cat << '__EOF__' > "$TARGET"
# ===================================================================
# Check Commands
# ===================================================================
template CheckCommand "check_patroni" {
command = [ PluginDir + "/check_patroni" ]
arguments = {
"--endpoints" = {
value = "$endpoints$"
order = -2
repeat_key = true
}
"--timeout" = {
value = "$timeout$"
order = -1
}
}
}
object CheckCommand "check_patroni_node_is_alive" {
import "check_patroni"
arguments += {
"node_is_alive" = {
order = 1
}
}
}
object CheckCommand "check_patroni_node_is_primary" {
import "check_patroni"
arguments += {
"node_is_primary" = {
order = 1
}
}
}
object CheckCommand "check_patroni_node_is_replica" {
import "check_patroni"
arguments += {
"node_is_replica" = {
order = 1
}
}
}
object CheckCommand "check_patroni_node_is_pending_restart" {
import "check_patroni"
arguments += {
"node_is_pending_restart" = {
order = 1
}
}
}
object CheckCommand "check_patroni_node_patroni_version" {
import "check_patroni"
arguments += {
"node_patroni_version" = {
order = 1
}
"--patroni-version" = {
value = "$patroni_version$"
order = 2
}
}
}
object CheckCommand "check_patroni_node_tl_has_changed" {
import "check_patroni"
arguments += {
"node_tl_has_changed" = {
order = 1
}
"--state-file" = {
value = "/tmp/$state_file$" # a quick and dirty way for this poc
order = 2
}
}
}
# -------------------------------------------------------------------
object CheckCommand "check_patroni_cluster_has_leader" {
import "check_patroni"
arguments += {
"cluster_has_leader" = {
order = 1
}
}
}
object CheckCommand "check_patroni_cluster_has_replica" {
import "check_patroni"
arguments += {
"cluster_has_replica" = {
order = 1
}
"--warning" = {
value = "$has_replica_warning$"
order = 2
}
"--critical" = {
value = "$has_replica_critical$"
order = 3
}
}
}
object CheckCommand "check_patroni_cluster_config_has_changed" {
import "check_patroni"
arguments += {
"cluster_config_has_changed" = {
order = 1
}
"--state-file" = {
value = "/tmp/$state_file$" # a quick and dirty way for this poc
order = 2
}
}
}
object CheckCommand "check_patroni_cluster_is_in_maintenance" {
import "check_patroni"
arguments += {
"cluster_is_in_maintenance" = {
order = 1
}
}
}
object CheckCommand "check_patroni_cluster_node_count" {
import "check_patroni"
arguments += {
"cluster_node_count" = {
order = 1
}
"--warning" = {
value = "$node_count_warning$"
order = 2
}
"--critical" = {
value = "$node_count_critical$"
order = 3
}
"--running-warning" = {
value = "$node_count_running_warning$"
order = 4
}
"--running-critical" = {
value = "$node_count_running_critical$"
order = 5
}
}
}
# ===================================================================
# Services
# ===================================================================
template Service "check_patroni" {
max_check_attempts = 3
check_interval = 1m # we spam a little for the sake of testing
retry_interval = 15 # we spam a little for the sake of testing
enable_perfdata = true
vars.timeout = 10
}
apply Service "check_patroni_node_is_alive" {
import "check_patroni"
check_command = "check_patroni_node_is_alive"
assign where "patroni_servers" in host.groups
}
apply Service "check_patroni_node_is_primary" {
import "check_patroni"
check_command = "check_patroni_node_is_primary"
assign where "patroni_servers" in host.groups
}
apply Service "check_patroni_node_is_replica" {
import "check_patroni"
check_command = "check_patroni_node_is_replica"
assign where "patroni_servers" in host.groups
}
apply Service "check_patroni_node_is_pending_restart" {
import "check_patroni"
check_command = "check_patroni_node_is_pending_restart"
assign where "patroni_servers" in host.groups
}
apply Service "check_patroni_node_patroni_version" {
import "check_patroni"
check_command = "check_patroni_node_patroni_version"
assign where "patroni_servers" in host.groups
}
apply Service "check_patroni_node_tl_has_changed" {
import "check_patroni"
vars.state_file = host.name + ".state"
check_command = "check_patroni_node_tl_has_changed"
assign where "patroni_servers" in host.groups
}
# -------------------------------------------------------------------
apply Service "check_patroni_cluster_has_leader" {
import "check_patroni"
check_command = "check_patroni_cluster_has_leader"
assign where "patroni_clusters" in host.groups
}
apply Service "check_patroni_cluster_has_replica" {
import "check_patroni"
check_command = "check_patroni_cluster_has_replica"
assign where "patroni_clusters" in host.groups
}
apply Service "check_patroni_cluster_config_has_changed" {
import "check_patroni"
vars.state_file = host.name + ".state"
check_command = "check_patroni_cluster_config_has_changed"
assign where "patroni_clusters" in host.groups
}
apply Service "check_patroni_cluster_is_in_maintenance" {
import "check_patroni"
check_command = "check_patroni_cluster_is_in_maintenance"
assign where "patroni_clusters" in host.groups
}
apply Service "check_patroni_cluster_node_count" {
import "check_patroni"
check_command = "check_patroni_cluster_node_count"
assign where "patroni_clusters" in host.groups
}
# ===================================================================
# Hosts meta
# ===================================================================
object HostGroup "patroni_servers" {
display_name = "patroni servers"
}
template Host "patroni_servers" {
groups = [ "patroni_servers" ]
check_command = "hostalive"
vars.patroni_version = "2.1.2"
}
# -------------------------------------------------------------------
object HostGroup "patroni_clusters" {
display_name = "patroni clusters"
}
template Host "patroni_clusters" {
groups = [ "patroni_clusters" ]
check_command = "dummy"
}
# ===================================================================
# Hosts meta
# ===================================================================
__EOF__
}
add_hosts(){
NODES=$@
for N in "${NODES[@]}"; do
IP="${N##*=}"
HOST="${N%=*}"
cat << __EOF__ >> "$TARGET"
object Host "$HOST" {
import "patroni_servers"
display_name = "Server patroni $HOST"
address = "$IP"
vars.endpoints = [ "http://" + address + ":8008" ]
}
__EOF__
done
}
add_cluster(){
CLUSTER=$1
NODES=$2
NAME=""
IPS=" "
for N in "${NODES[@]}"; do
IP="${N##*=}"
HOST="${N%=*}"
NAME="$NAME $HOST"
IPS="$IPS\"http://${IP}:8008\", "
done
cat << __EOF__ >> "$TARGET"
object Host "$CLUSTER" {
import "patroni_clusters"
display_name = "Cluster: $CLUSTER ($NAME )"
vars.endpoints = [$IPS ]
vars.has_replica_warning = "1:"
vars.has_replica_critical = "1:"
vars.node_count_warning = "2:"
vars.node_count_critical = "1:"
vars.node_count_running_warning = "2:"
vars.node_count_running_critical = "1:"
}
__EOF__
}
case "$ACTION" in
"init")
init
add_hosts "$NODES"
add_cluster "$CLUSTER" "$NODES"
;;
"add")
add_hosts "$NODES"
add_cluster "$CLUSTER" "$NODES"
;;
*)
usage
echo "error: invalid action"
exit 1
esac

View file

@ -0,0 +1,347 @@
#!/usr/bin/env bash
info (){
echo "$1"
}
#set -o errexit
set -o nounset
set -o pipefail
NODENAME="$1"
shift
PG_ICINGA_USER_NAME="supervisor"
PG_ICINGA_USER_PWD="th3Pass"
PG_ICINGAWEB_USER_NAME="supervisor"
PG_ICINGAWEB_USER_PWD="th3Pass"
PG_DIRECTOR_USER_NAME="supervisor"
PG_DIRECTOR_USER_PWD="th3Pass"
PG_OPM_USER_NAME="opm"
PG_OPM_USER_PWD="th3Pass"
PG_GRAFANA_USER_NAME="supervisor"
PG_GRAFANA_USER_PWD="th3Pass"
set_hostname(){
info "#============================================================================="
info "# hostname and /etc/hosts setup"
info "#============================================================================="
hostnamectl set-hostname "${NODENAME}"
sed --in-place -e "s/\(127\.0\.0\.1\s*localhost$\)/\1 ${NODENAME}/" /etc/hosts
}
packages(){
info "#============================================================================="
info "# install required repos and packages"
info "#============================================================================="
apt-get update || true
apt-get -y install apt-transport-https wget gnupg software-properties-common
DIST=$(awk -F"[)(]+" '/VERSION=/ {print $2}' /etc/os-release)
echo "deb https://packages.icinga.com/debian icinga-${DIST} main" > "/etc/apt/sources.list.d/${DIST}-icinga.list"
echo "deb-src https://packages.icinga.com/debian icinga-${DIST} main" >> "/etc/apt/sources.list.d/${DIST}-icinga.list"
echo "deb https://packages.grafana.com/oss/deb stable main" > /etc/apt/sources.list.d/grafana.list
echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list
wget -q -O - https://packages.icinga.com/icinga.key | apt-key add -
wget -q -O - https://packages.grafana.com/gpg.key | sudo apt-key add -
wget -q -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
apt-get update || true
PACKAGES=(
grafana
icinga2 icinga2-ido-pgsql icingaweb2 icingaweb2-module-monitoring icingacli
postgresql-client postgresql-14
php7.3-pgsql php7.3-imagick php7.3-intl
nagios-plugins
)
DEBIAN_FRONTEND=noninteractive apt install -q -y "${PACKAGES[@]}"
systemctl --quiet --now enable postgresql@14
}
icinga_setup(){
info "#============================================================================="
info "# Icinga setup"
info "#============================================================================="
## this part is already done by the standart icinga install with the user icinga2
## and a random password, here we dont really care
cat << __EOF__ | sudo -u postgres psql
DROP ROLE IF EXISTS supervisor;
DROP DATABASE IF EXISTS icinga2;
CREATE ROLE ${PG_ICINGA_USER_NAME} WITH LOGIN SUPERUSER PASSWORD '${PG_ICINGA_USER_PWD}';
CREATE DATABASE icinga2;
__EOF__
echo "*:*:*:${PG_ICINGA_USER_NAME}:${PG_ICINGA_USER_PWD}" > ~postgres/.pgpass
chown postgres:postgres ~postgres/.pgpass
chmod 600 ~postgres/.pgpass
PGPASSFILE=~postgres/.pgpass psql -U $PG_ICINGA_USER_NAME -h 127.0.0.1 -d icinga2 -f /usr/share/icinga2-ido-pgsql/schema/pgsql.sql
icingacli setup config directory --group icingaweb2
icingacli setup token create
## this part is already done by the standart icinga install with the user icinga2
cat << __EOF__ > /etc/icinga2/features-available/ido-pgsql.conf
/**
* The db_ido_pgsql library implements IDO functionality
* for PostgreSQL.
*/
library "db_ido_pgsql"
object IdoPgsqlConnection "ido-pgsql" {
user = "${PG_ICINGA_USER_NAME}",
password = "${PG_ICINGA_USER_PWD}",
host = "localhost",
database = "icinga2"
}
__EOF__
icinga2 feature enable ido-pgsql
icinga2 feature enable command
icinga2 feature enable perfdata
#icinga2 node wizard
icinga2 node setup --master --cn s1 --zone master
systemctl restart icinga2.service
}
icinga_API(){
info "#============================================================================="
info "# Icinga API"
info "#============================================================================="
icinga2 api setup
cat <<__EOF__ >> /etc/icinga2/conf.d/api-users.conf
object ApiUser "icingaapi" {
password = "th3Pass"
permissions = [ "*" ]
}
__EOF__
systemctl restart icinga2.service
}
icinga_web(){
info "#============================================================================="
info "# Icinga2 Web"
info "#============================================================================="
if [ "$PG_ICINGA_USER_NAME" != "$PG_ICINGAWEB_USER_NAME" ]; then
sudo -u postgres psql -c "CREATE ROLE ${PG_ICINGAWEB_USER_NAME} WITH LOGIN PASSWORD '${PG_ICINGAWEB_USER_PWD}';"
fi
sudo -u postgres psql -c "CREATE DATABASE icingaweb_db OWNER ${PG_ICINGAWEB_USER_NAME};"
sed --in-place -e "s/;date\.timezone =/date.timezone = europe\/paris/" /etc/php/7.3/apache2/php.ini
a2enconf icingaweb2
a2enmod rewrite
a2dismod mpm_event
a2enmod php7.3
systemctl restart apache2
}
director(){
info "#============================================================================="
info "# Icinga director"
info "#============================================================================="
# Create the database
if [ "$PG_ICINGA_USER_NAME" != "$PG_DIRECTOR_USER_NAME" ]; then
sudo -u postgres psql -c "CREATE ROLE ${PG_DIRECTOR_USER_NAME} WITH LOGIN PASSWORD '${PG_DIRECTOR_USER_PWD}';"
fi
sudo -u postgres psql -c "CREATE DATABASE director_db OWNER ${PG_DIRECTOR_USER_NAME};"
sudo -iu postgres psql -d director_db -c "CREATE EXTENSION pgcrypto;"
## Prereq
MODULE_NAME=incubator
MODULE_VERSION=v0.11.0
MODULES_PATH="/usr/share/icingaweb2/modules"
MODULE_PATH="${MODULES_PATH}/${MODULE_NAME}"
RELEASES="https://github.com/Icinga/icingaweb2-module-${MODULE_NAME}/archive"
mkdir "$MODULE_PATH" \
&& wget -q $RELEASES/${MODULE_VERSION}.tar.gz -O - \
| tar xfz - -C "$MODULE_PATH" --strip-components 1
icingacli module enable "${MODULE_NAME}"
## Director
MODULE_VERSION="1.8.1"
ICINGAWEB_MODULEPATH="/usr/share/icingaweb2/modules"
REPO_URL="https://github.com/icinga/icingaweb2-module-director"
TARGET_DIR="${ICINGAWEB_MODULEPATH}/director"
URL="${REPO_URL}/archive/v${MODULE_VERSION}.tar.gz"
useradd -r -g icingaweb2 -d /var/lib/icingadirector -s /bin/false icingadirector
install -d -o icingadirector -g icingaweb2 -m 0750 /var/lib/icingadirector
install -d -m 0755 "${TARGET_DIR}"
wget -q -O - "$URL" | tar xfz - -C "${TARGET_DIR}" --strip-components 1
cp "${TARGET_DIR}/contrib/systemd/icinga-director.service" /etc/systemd/system/
icingacli module enable director
systemctl daemon-reload
systemctl enable icinga-director.service
systemctl start icinga-director.service
# The permission have to be like this to let icingaweb activate modules
chown -R www-data:icingaweb2 /etc/icingaweb2
}
grafana(){
info "#============================================================================="
info "# Grafana"
info "#============================================================================="
if [ "$PG_ICINGA_USER_NAME" != "$PG_GRAFANA_USER_NAME" ]; then
sudo -u postgres psql -c "CREATE ROLE ${PG_GRAFANA_USER_NAME} WITH LOGIN PASSWORD '${PG_GRAFANA_USER_PWD}';"
fi
sudo -u postgres psql -c "CREATE DATABASE grafana OWNER ${PG_GRAFANA_USER_NAME};"
cat << __EOF__ > /etc/grafana/grafana.ini
[database]
# You can configure the database connection by specifying type, host, name, user and password
# as seperate properties or as on string using the url propertie.
# Either "mysql", "postgres" or "sqlite3", it's your choice
type = postgres
host = 127.0.0.1:5432
name = grafana
user = $PG_GRAFANA_USER_NAME
password = $PG_GRAFANA_USER_PWD
__EOF__
systemctl --quiet --now enable grafana-server.service
}
opm(){
info "#============================================================================="
info "# OPM"
info "#============================================================================="
## OPM Install
DEBIAN_FRONTEND=noninteractive apt install -q -y postgresql-server-dev-10 libdbd-pg-perl git build-essential
cd /usr/local/src || exit 1
git clone https://github.com/OPMDG/opm-core.git
git clone https://github.com/OPMDG/opm-wh_nagios.git
cd /usr/local/src/opm-wh_nagios/pg/ || exit 1
make install
cd /usr/local/src/opm-core/pg/ || exit 1
make install
## OPM db setup
cat << __EOF__ | sudo -iu postgres psql
CREATE ROLE ${PG_OPM_USER_NAME} WITH LOGIN PASSWORD '${PG_OPM_USER_PWD}';
CREATE DATABASE opm OWNER ${PG_OPM_USER_NAME};
__EOF__
cat << __EOF__ | sudo -iu postgres psql -d opm
CREATE EXTENSION opm_core;
CREATE EXTENSION wh_nagios CASCADE;
SELECT * FROM grant_dispatcher('wh_nagios', 'opm');
__EOF__
## OPM dispatcher
cat <<EOF > /etc/opm_dispatcher.conf
daemon=0
directory=/var/spool/icinga2/perfdata
frequency=5
db_connection_string=dbi:Pg:dbname=opm host=localhost
db_user=${PG_OPM_USER_NAME}
db_password=${PG_OPM_USER_PWD}
debug=0
syslog=1
hostname_filter = /^$/ # Empty hostname. Never happens
service_filter = /^$/ # Empty service
label_filter = /^$/ # Empty label
EOF
cat <<'EOF' > /etc/systemd/system/opm_dispatcher.service
[Unit]
Description=dispatcher nagios, import perf files from icinga to opm
[Service]
User=nagios
Group=nagios
ExecStart=/usr/local/src/opm-wh_nagios/bin/nagios_dispatcher.pl -c /etc/opm_dispatcher.conf
# start right after boot
Type=simple
# restart on crash
Restart=always
# after 10s
RestartSec=10
[Install]
WantedBy=multi-user.target
EOF
## OPM planned task
cat <<'EOF' > /etc/systemd/system/opm_dispatch_record.service
[Unit]
Description=Run wh_nagios.dispatch_record() on OPM database
[Service]
Type=oneshot
User=postgres
Group=postgres
SyslogIdentifier=opm_dispatch_record
ExecStart=/usr/bin/psql -U postgres -d opm -c "SELECT * FROM wh_nagios.dispatch_record()"
EOF
cat <<'EOF' > /etc/systemd/system/opm_dispatch_record.timer
[Unit]
Description=Timer to run wh_nagios.dispatch_record() on OPM
[Timer]
OnBootSec=60s
OnUnitInactiveSec=1min
[Install]
WantedBy=timers.target
EOF
systemctl daemon-reload
systemctl enable opm_dispatcher
systemctl start opm_dispatcher
systemctl enable opm_dispatch_record.timer
systemctl start opm_dispatch_record.timer
## To check once everything is setup (icingaweb is setup)
# sudo journalctl -fu opm_dispatcher
# sudo ournalctl -ft opm_dispatch_record
## Grants for graphana
sudo -iu postgres psql -c "CREATE ROLE grafana WITH LOGIN PASSWORD 'th3Pass'"
cat <<EOQ | sudo -iu postgres psql -d opm
GRANT CONNECT ON DATABASE opm TO ${PG_GRAPHANA_USER_NAME};
GRANT SELECT ON ALL TABLES IN SCHEMA public,wh_nagios TO ${PG_GRAPHANA_USER_NAME};
ALTER DEFAULT PRIVILEGES IN SCHEMA public, wh_nagios GRANT SELECT ON TABLES TO ${PG_GRAPHANA_USER_NAME};
GRANT USAGE ON SCHEMA public,wh_nagios TO ${PG_GRAPHANA_USER_NAME};
GRANT EXECUTE ON FUNCTION wh_nagios.get_metric_data(bigint, timestamptz, timestamptz) TO ${PG_GRAPHANA_USER_NAME};
GRANT USAGE ON SCHEMA pr_dalibo TO ${PG_GRAPHANA_USER_NAME};
GRANT EXECUTE ON FUNCTION pr_dalibo.variation_taille_service(p_hostname text, p_service text,
p_label text, p_tstamp_debut timestamp with time zone, p_duree interval,
OUT v_compteur_debut numeric, OUT v_compteur_fin numeric,
OUT v_compteur_delta numeric, OUT v_type_corr text, OUT v_corr double precision,
OUT v_taille_un_mois numeric) TO ${PG_GRAPHANA_USER_NAME};
GRANT EXECUTE ON FUNCTION pr_dalibo.pg_taille_jolie(p_valeur bigint, OUT v_jolie text) TO ${PG_GRAPHANA_USER_NAME};
EOQ
}
set_hostname
packages
icinga_setup
icinga_API
icinga_web
#director ## Not needed anymore, kept for reference
grafana
opm
# "icingacli setup" doesnt work when the icinga2 web setup is finished
info "#============================================================================="
info "# Icinga Web -- $(icingacli setup token show)"
info "#============================================================================="
exit 0