nagios-nrpe: add check_sentinel
All checks were successful
Ansible Lint |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |2627|5|2622|6|:+1: Reference build: <a href="https://jenkins.evolix.org/job/gitea/job/ansible-roles/job/unstable/381//ansiblelint">Evolix » ansible-roles » unstable #381</a>
gitea/ansible-roles/pipeline/head This commit looks good

This commit is contained in:
Brice Waegeneire 2023-10-27 15:02:28 +02:00
parent 331f4e8875
commit 74a6b2ead1
3 changed files with 205 additions and 0 deletions

View file

@ -14,6 +14,7 @@ The **patch** part changes is incremented if multiple releases happen the same m
### Added
* Preliminary work for php83
* nagios-nrpe: add check_sentinel for monitoring Redis Sentinel
### Changed

View file

@ -0,0 +1,203 @@
#!/bin/sh
#
# Verify the health of Redis instances using Redis Sentinel.
#
# Exemple output:
# OK - 0 UNCHK / 0 CRIT / 0 WARN / 4 OK
#
# OK: Sentinels quorum reached / Active: 2 (quorum: 2)
# OK: '10.11.24.217:6379' is a 'master' of 'redis'
# OK: '10.11.24.227:6379' is a 'slave' of 'redis', in sync with '10.11.24.217'
# OK: '10.11.24.208:6379' is a 'slave' of 'redis', in sync with '10.11.24.217'
set -u
usage() {
echo "Usage:"
echo " $0 -c <sentinel config file>"
exit 1
}
sentinel_config_file=
while [ $# -gt 0 ]; do
case $1 in
-c)
sentinel_config_file="$2"
shift
shift
;;
*)
echo "Invalid option: $1"
usage
;;
esac
done
test -z "$sentinel_config_file" && usage
# NRPE specific
exit_code=0
ok_count=0
warn_count=0
crit_count=0
unchk_count=0
output=$(mktemp --tmpdir $(basename "$0").XXXXXXXX)
ok() {
message=$1
printf "OK: %s\n" "$message" >> "$output"
ok_count=$(( ok_count + 1))
}
warn() {
message=$1
printf "WARN: %s\n" "$message" >> "$output"
warn_count=$(( warn_count + 1))
[ "$exit_code" -lt 1 ] && exit_code=1
}
crit() {
message=$1
printf "CRIT: %s\n" "$message" >> "$output"
crit_count=$(( crit_count + 1))
[ "$exit_code" -lt 2 ] && exit_code=2
}
unchk() {
message=$1
printf "UNCHK: %s\n" "$message" >> "$output"
unchk_count=$(( unchk_count + 1))
[ "$exit_code" -lt 3 ] && exit_code=3
exit 1
}
nrpe_output() {
case "$exit_code" in
0) output_header="OK" ;;
1) output_header="WARNING" ;;
2) output_header="CRITICAL" ;;
*) output_header="UNCHK" ;;
esac
printf "%s - %s UNCHK / %s CRIT / %s WARN / %s OK\n\n" \
"${output_header}" "${unchk_count}" "${crit_count}" "${warn_count}" "${ok_count}"
grep -E "^CRIT" "$output"
grep -E "^WARN" "$output"
grep -E "^UNCHK" "$output"
grep -E "^OK" "$output"
rm -f "$output"
exit "$exit_code"
}
trap nrpe_output EXIT
# Redis specific
test -r "$sentinel_config_file" || unchk "Can't read file '${sentinel_config_file}'"
command -v redis-cli 1>/dev/null || unchk "Can't find executable 'redis-cli'"
redis_cli_args=''
sentinel_port=$(awk '/^port/{print $2}' "${sentinel_config_file}")
! test -z "$sentinel_port" && redis_cli_args="${redis_cli_args} -p ${sentinel_port}"
sentinel_pass=$(awk '/^requirepass/{print $2}' "${sentinel_config_file}")
! test -z "$sentinel_pass" && redis_cli_args="${redis_cli_args} --pass ${sentinel_pass}"
alias _redis-cli="redis-cli ${redis_cli_args}"
# List all masters names known by sentinel
redis_sentinel_masters() {
_redis-cli sentinel masters |
sed 'N;s/\n/=/' |
awk -F = '$1 ~ /^name$/ { print $2 }'
}
# Verify redis sentinel master
check_master() {
master=$1
input=$(_redis-cli sentinel master "$master" | sed 'N;s/\n/=/')
for line in $input; do
case "$line" in
ip=*) ip=${line#ip=} ;;
port=*) port=${line#port=} ;;
flags=*) flags=${line#flags=} ;;
num-other-sentinels=*) num_sentinels=$(( ${line#num-other-sentinels=} + 1)) ;;
quorum=*) quorum=${line#quorum=} ;;
esac
done
if [ "$num_sentinels" -ge "$quorum" ]; then
ok "Sentinels quorum reached / Active: ${num_sentinels} (quorum: ${quorum})"
else
crit "No quorum of sentinels / Active: ${num_sentinels} (quorum: ${quorum})"
fi
if echo "$flags" | grep -q master; then
ok "'${ip}:${port}' is a '${flags}' of '${master}'"
else
crit "'${ip}:${port}' is not a 'master' of '${master}'"
fi
unset ip port flags status master_host
}
check_slaves_output() {
name=$1
flags=$2
status=$3
master_host=$4
if [ "$status" = ok ]; then
ok "'${name}' is a '${flags}' of '${master}', in sync with '${master_host}'"
else
crit "'${name}' is a '${flags}' of '${master}', not in sync"
fi
}
# Verify redis slaves are in sync with their master
check_slaves() {
master=$1
input=$(_redis-cli sentinel slaves "$master" | sed 'N;s/\n/=/')
name=
flags=
status=
master_host=
for line in $input; do
case "$line" in
# "name" is the field begining a new record
name=*)
if [ -n "$name" ]; then
# output for all the slaves record, except the very last one
check_slaves_output "$name" "$flags" "$status" "$master_host"
unset name flags status master_host
fi
name=${line#name=}
;;
flags=*) flags=${line#flags=} ;;
master-link-status=*) status=${line#master-link-status=} ;;
master-host=*) master_host=${line#master-host=} ;;
esac
done
# output for the last slave record in $input
check_slaves_output "$name" "$flags" "$status" "$master_host"
unset name flags status master_host
}
if ! systemctl is-active --quiet redis-sentinel.service; then
unchk "'redis-sentinel' process isn't running"
else
masters=$(redis_sentinel_masters)
if [ -n "$masters" ]; then
for master in $masters; do
check_master "$master"
check_slaves "$master"
done
else
crit "No Redis master"
fi
fi

View file

@ -75,6 +75,7 @@ command[check_varnish]={{ nagios_plugins_directory }}/check_varnish_health -i 12
command[check_haproxy]=sudo {{ nagios_plugins_directory }}/check_haproxy_stats -s /run/haproxy/admin.sock -w 80 -c 90 --ignore-maint --ignore-nolb --ignore-drain
command[check_minifirewall]=sudo {{ nagios_plugins_directory }}/check_minifirewall
command[check_redis_instances]={{ nagios_plugins_directory }}/check_redis_instances
command[check_sentinel]=sudo {{ nagios_plugins_directory }}/check_sentinel -c /etc/redis/sentinel.conf
command[check_hpraid]={{ nagios_plugins_directory }}/check_hpraid
command[check_php-fpm]={{ nagios_plugins_directory }}/check_phpfpm_multi
command[check_php-fpm56]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php56/rootfs/etc/php5/fpm/pool.d/