Suite implémentation

This commit is contained in:
William Hirigoyen 2024-02-05 18:08:53 +01:00
parent 05e2bc6a05
commit 19204a03b6

View file

@ -12,32 +12,40 @@ monitoringctl gives some control over NRPE checks and alerts.
Usage: monitoringctl [OPTIONS] ACTION ARGUMENTS Usage: monitoringctl [OPTIONS] ACTION ARGUMENTS
OPTIONS: GENERAL OPTIONS:
-h, --help Print this message and exit. -h, --help Print this message and exit.
-v, --verbose Print more informations. -v, --verbose Print more informations.
-f, --for DURATION Specify disable-alerts duration (default: 1h).
ACTIONS: ACTIONS:
check CHECK_NAME check [--bypass-nrpe] CHECK_NAME
Ask CHECK_NAME status to NRPE as an HTTP request (on 127.0.0.1:5666). Ask CHECK_NAME status to NRPE as an HTTP request.
Also show command that NRPE has supposedly run. Indicates which command NRPE has supposedly run (from its configuration).
Options:
-b, --bypass-nrpe Execute directly command from NRPE configuration,
without requesting to NRPE.
alerts-status alerts-status
Print : Print :
- Whether alerts are enabled or not (silenced). - Wether alerts are enabled or not (silenced).
- If alerts are disabled (silenced): - If alerts are disabled (silenced):
- Comment. - Comment.
- Time left before automatic re-enable. - Time left before automatic re-enable.
disable-alerts [--for DURATION] 'COMMENT' disable-alerts [--duration DURATION] 'COMMENT'
Disable (silence) all alerts (only global for now) for DURATION and write COMMENT into the log. Disable (silence) all alerts (only global for now) for DURATION and write COMMENT into the log.
Checks output is still printed, so alerts history won't be lost. Checks output is still printed, so alerts history won't be lost.
Options:
-d, --duration DURATION Specify disable-alerts duration (default: 1h).
enable-alerts 'COMMENT' enable-alerts 'COMMENT'
Re-enable all alerts (only global for now) Re-enable all alerts (only global for now)
@ -66,11 +74,21 @@ function usage_error {
exit 1 exit 1
} }
function now {
date --iso-8601=seconds
}
### CHECK ACTION ########################## function log {
# $1: message
echo "$(now) - $1" >> "${log_path}"
}
# Print NRPE configuration without comments and in the same order
# than Nagios (taking account that order changes from Deb10) ### FUNCTIONS FOR CONFIGURATION READING ##########################
# Print NRPE configuration, with includes, without comments
# and in the same order than NRPE does (taking account that
# order changes from Deb10)
function get_conf_from_file { function get_conf_from_file {
# $1: NRPE conf file (.cfg) # $1: NRPE conf file (.cfg)
if [ ! -f "$1" ]; then return; fi if [ ! -f "$1" ]; then return; fi
@ -89,6 +107,9 @@ function get_conf_from_file {
done <<< "${conf_lines}" done <<< "${conf_lines}"
} }
# Print NRPE configuration, with includes, without comments
# and in the same order than NRPE does (taking account that
# order changes from Deb10)
function get_conf_from_dir { function get_conf_from_dir {
# $1: NRPE conf dir # $1: NRPE conf dir
if [ ! -d "$1" ]; then return; fi if [ ! -d "$1" ]; then return; fi
@ -111,94 +132,61 @@ function get_conf_from_dir {
done done
} }
# Print the checks that are configured in NRPE
function grep_conf { function get_checks_list {
# $1: check name (load, disk1…) echo "${conf_lines}" | grep -E "command\[check_.*\]=" | awk -F"[\\\[\\\]=]" '{sub("check_", "", $2); print $2}' | sort | uniq
# $2: nrpe conf file (.cfg)
grep -E -R --no-filename "^\s*(include(_dir)?=.+|command\[check_$1\])" "$2" | grep -v -E '^[[:blank:]]*#'
} }
# Print check commands, in the same order as they are declared in the conf, # Print the commands defined for check $1 in NRPE configuration
# with respect to the include and include_dir directives, which are function get_check_commands {
# explored recursively. # $1: check name
function get_config_file_checks { echo "$conf_lines" | grep -E "command\[check_$1\]" | cut -d'=' -f2-
# $1: check name (load, disk1…)
# $2: nrpe conf file (.cfg)
conf_lines=$(grep_conf "$1" "$2")
while read -r line; do
if [[ "${line}" =~ .*"check_$1".* ]]; then
echo "${line}" | cut -d'=' -f2-
elif [[ "${line}" =~ .*'include='.* ]]; then
conf_file=$(echo "${line}" | cut -d= -f2)
get_config_file_checks "$1" "${conf_file}"
elif [[ "${line}" =~ .*'include_dir='.* ]]; then
conf_dir=$(echo "${line}" | cut -d= -f2)
get_config_dir_checks "$1" "${conf_dir}"
fi
done <<< "${conf_lines}"
} }
# Same as get_config_file_checks, but for a recursive search in a directory.
function get_config_dir_checks { ### CHECK ACTION ##########################
# $1: check name (load, disk1…)
# $2: nrpe conf dir
if [ "${debian_major_version}" -ge 10 ]; then
# From Deb10, NRPE use scandir() with alphasort() function
sort_command="sort"
else
# Before Deb10, NRPE use loaddir(), like find utility
sort_command="cat -"
fi
# Add conf files in dir to be processed recursively
for file in $(find "$2" -maxdepth 1 -name "*.cfg" | ${sort_command}); do
if [ -f "${file}" ]; then
get_config_file_checks "$1" "${file}"
elif [ -d "${file}" ]; then
get_config_dir_checks "$1" "${file}"
fi
done
}
function check { function check {
# $1: check name
check_nrpe_bin=/usr/lib/nagios/plugins/check_nrpe check_nrpe_bin=/usr/lib/nagios/plugins/check_nrpe
debian_major_version=$(cut -d "." -f 1 < /etc/debian_version)
if [ ! -f "${check_nrpe_bin}" ]; then if [ ! -f "${check_nrpe_bin}" ]; then
>&2 echo "${check_nrpe_bin} is missing, please install nagios-nrpe-plugin package." >&2 echo "${check_nrpe_bin} is missing, please install nagios-nrpe-plugin package."
exit 1 exit 1
fi fi
conf_lines=$(get_conf_from_file "${conf_path}")
server_address=$(echo "$conf_lines" | grep "server_address" | cut -d'=' -f2) server_address=$(echo "$conf_lines" | grep "server_address" | cut -d'=' -f2)
if [ -z "${server_address}" ]; then server_address="127.0.0.1"; fi if [ -z "${server_address}" ]; then server_address="127.0.0.1"; fi
server_port=$(echo "$conf_lines" | grep "server_port" | cut -d'=' -f2) server_port=$(echo "$conf_lines" | grep "server_port" | cut -d'=' -f2)
if [ -z "${server_port}" ]; then server_port="5666"; fi if [ -z "${server_port}" ]; then server_port="5666"; fi
found_commands=$(echo "$conf_lines" | grep -E "command\[check_$1\]" | cut -d'=' -f2-) check_commands=$(get_check_commands "$1")
if [ -n "${found_commands}" ]; then
if [ -n "${check_commands}" ]; then
if [ "${verbose}" == "True" ]; then if [ "${verbose}" == "True" ]; then
echo "Available commands (in config order, the last one overwrites the others):" echo "Available commands (in config order, the last one overwrites the others):"
echo "$found_commands" echo "$check_commands"
fi fi
nrpe_command=$(echo "${found_commands}" | tail -n1) check_command=$(echo "${check_commands}" | tail -n1)
echo "Command used by NRPE:" echo "Command used by NRPE:"
echo " ${nrpe_command}" echo " ${check_command}"
else else
>&2 echo "No command found in NRPE configuration for this check:" >&2 echo "Warning: no command found in NRPE configuration for check '${1}'."
>&2 echo " $1" if [ "${bypass_nrpe}" = "True" ]; then
>&2 echo "Aborted."
exit 1
fi
fi fi
request_command="${check_nrpe_bin} -H ${server_address} -p ${server_port} -c check_$1 2&>1" if [ "${bypass_nrpe}" = "False" ]; then
request_command="${check_nrpe_bin} -H ${server_address} -p ${server_port} -c check_$1 2&>1"
else
request_command="sudo -u nagios -- ${check_command}"
fi
if [ "${verbose}" == "True" ]; then if [ "${verbose}" == "True" ]; then
echo "Executing:" echo "Executing:"
@ -208,14 +196,18 @@ function check {
check_output=$(${request_command}) check_output=$(${request_command})
rc=$? rc=$?
echo "NRPE service output (on ${server_address}:5666):" if [ "${bypass_nrpe}" = "False" ]; then
echo "NRPE service output (on ${server_address}:${server_port}):"
else
echo "Direct check output (bypassing NRPE):"
fi
echo "${check_output}" echo "${check_output}"
exit "${rc}" exit "${rc}"
} }
### (EN|DIS)ABLE-ALERT ACTION ########################## ### (EN|DIS)ABLE-ALERTS ACTIONS ##########################
function filter_duration { function filter_duration {
# Format (in brief): XdYhZm # Format (in brief): XdYhZm
@ -229,10 +221,27 @@ function filter_duration {
fi fi
} }
# Check that NRPE commands are wrapped by alerts_wrapper script
function is_nrpe_wrapped {
for check in $(get_checks_list); do
command=$(get_check_commands "${check}" | tail -n1)
echo "${command}" | grep --quiet --no-messages alerts_wrapper
rc=$?
if [ "${rc}" -ne 0 ]; then
>&2 echo "Warning: check '${check}' has no alerts_wrapper, it will not be disabled:"
>&2 echo " ${command}"
fi
done
}
function disable-alerts { function disable-alerts {
# $1: comment
#TODO if ! command -v alerts_switch &> /dev/null; then
>&2 echo "Error: script 'alerts_switch' is not installed."
>&2 echo "Aborted."
exit 1
fi
# TODO: Check not disabled yet # TODO: Check not disabled yet
@ -242,8 +251,8 @@ function disable-alerts {
Hint: use --duration DURATION to change default time length." Hint: use --duration DURATION to change default time length."
fi fi
cat <<EOF cat <<EOF
Warning: alerts will be disabled for ${duration}${default_msg} Alerts will be disabled for ${duration}${default_msg}
Check outputs will still be gathered by our monitoring system, so alerts history won't be lost. Our monitoring system will continue to gather checks outputs, so alerts history won't be lost.
To re-enable alerts before ${duration}, execute (as root or with sudo): To re-enable alerts before ${duration}, execute (as root or with sudo):
monitoringctl enable-alerts monitoringctl enable-alerts
EOF EOF
@ -251,7 +260,25 @@ EOF
read -r answer read -r answer
if [ "$answer" = "Y" ] || [ "$answer" = "y" ]; then if [ "$answer" = "Y" ] || [ "$answer" = "y" ]; then
#systemd-run --quiet --unit="" --on-calendar="" log "Action disable-alerts requested for ${duration}: '${1}'"
for check in $(get_checks_list); do
# Log a warning if check has no wrapper
command=$(get_check_commands "${check}" | tail -n1)
echo "${command}" | grep --quiet --no-messages alerts_wrapper
rc=$?
if [ "${rc}" -ne 0 ]; then
log "Warning: check '${check}' has no alerts_wrapper, it will not be disabled."
fi
wrapper_names=$(get_check_commands "${check}" | tail -n1 | awk '{match($0, /.*--name\s+([^[:space:]]+)/, arr); print arr[1]}')
for name in $(echo "${wrapper_names=}" | tr ',' '\n'); do
echo "$(now) - Executing 'alerts_switch disable ${name}'" >> "${log_path}"
alerts_switch disable "${name}"
done
done
#TODO remove previous units if any
#TODO systemd-run --quiet --unit="" --on-calendar="" -- monitoringctl enable-alerts "[AUTO] ${}"
echo "Alerts are now disabled for ${duration}." echo "Alerts are now disabled for ${duration}."
else else
echo "Canceled." echo "Canceled."
@ -260,19 +287,26 @@ EOF
exit 0 exit 0
} }
function enable-alerts { function enable-alerts {
# $1: comment
#TODO #TODO
echo "Alerts are re-enabled." echo "Alerts are re-enabled (stub)."
#echo "Alerts were already enabled." #echo "Alerts were already enabled."
exit 0 exit 0
} }
### ALERTS-STATUS ACTION ##########################
function alerts-status {
# TODO
true
}
### MAIN ######################################### ### MAIN #########################################
# No root # No root
@ -293,6 +327,7 @@ comment=""
verbose="False" verbose="False"
duration="1h" duration="1h"
default_duration="True" default_duration="True"
bypass_nrpe="False"
# Parse arguments and options # Parse arguments and options
while :; do while :; do
@ -303,6 +338,9 @@ while :; do
-v|--verbose) -v|--verbose)
verbose="True" verbose="True"
shift;; shift;;
-b|--bypass-nrpe)
bypass_nrpe="True"
shift;;
-d|--duration) -d|--duration)
if [ "${default_duration}" = "False" ]; then if [ "${default_duration}" = "False" ]; then
usage_error "Option --duration: defined multiple times." usage_error "Option --duration: defined multiple times."
@ -322,6 +360,11 @@ while :; do
esac esac
done done
debian_major_version=$(cut -d "." -f 1 < /etc/debian_version)
conf_lines=$(get_conf_from_file "${conf_path}")
if [ -z "${action}" ]; then if [ -z "${action}" ]; then
usage_error "Missing or invalid ACTION argument." usage_error "Missing or invalid ACTION argument."
fi fi
@ -364,7 +407,17 @@ if [ "${action}" = "disable-alerts" ]; then
usage_error "Action disable-alerts: too many arguments." usage_error "Action disable-alerts: too many arguments."
fi fi
is_nrpe_wrapped
comment="$1" comment="$1"
disable-alerts "${comment}" disable-alerts "${comment}"
fi fi
if [ "${action}" = "alerts-status" ]; then
if [ "$#" -gt 0 ]; then
usage_error "Action alerts-status: too many arguments."
fi
alerts-status
fi