Suite implémentation
This commit is contained in:
parent
05e2bc6a05
commit
19204a03b6
|
@ -12,32 +12,40 @@ monitoringctl gives some control over NRPE checks and alerts.
|
||||||
|
|
||||||
Usage: monitoringctl [OPTIONS] ACTION ARGUMENTS
|
Usage: monitoringctl [OPTIONS] ACTION ARGUMENTS
|
||||||
|
|
||||||
OPTIONS:
|
GENERAL OPTIONS:
|
||||||
|
|
||||||
-h, --help Print this message and exit.
|
-h, --help Print this message and exit.
|
||||||
-v, --verbose Print more informations.
|
-v, --verbose Print more informations.
|
||||||
-f, --for DURATION Specify disable-alerts duration (default: 1h).
|
|
||||||
|
|
||||||
ACTIONS:
|
ACTIONS:
|
||||||
|
|
||||||
check CHECK_NAME
|
check [--bypass-nrpe] CHECK_NAME
|
||||||
|
|
||||||
Ask CHECK_NAME status to NRPE as an HTTP request (on 127.0.0.1:5666).
|
Ask CHECK_NAME status to NRPE as an HTTP request.
|
||||||
Also show command that NRPE has supposedly run.
|
Indicates which command NRPE has supposedly run (from its configuration).
|
||||||
|
|
||||||
|
Options:
|
||||||
|
|
||||||
|
-b, --bypass-nrpe Execute directly command from NRPE configuration,
|
||||||
|
without requesting to NRPE.
|
||||||
|
|
||||||
alerts-status
|
alerts-status
|
||||||
|
|
||||||
Print :
|
Print :
|
||||||
- Whether alerts are enabled or not (silenced).
|
- Wether alerts are enabled or not (silenced).
|
||||||
- If alerts are disabled (silenced):
|
- If alerts are disabled (silenced):
|
||||||
- Comment.
|
- Comment.
|
||||||
- Time left before automatic re-enable.
|
- Time left before automatic re-enable.
|
||||||
|
|
||||||
disable-alerts [--for DURATION] 'COMMENT'
|
disable-alerts [--duration DURATION] 'COMMENT'
|
||||||
|
|
||||||
Disable (silence) all alerts (only global for now) for DURATION and write COMMENT into the log.
|
Disable (silence) all alerts (only global for now) for DURATION and write COMMENT into the log.
|
||||||
Checks output is still printed, so alerts history won't be lost.
|
Checks output is still printed, so alerts history won't be lost.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
|
||||||
|
-d, --duration DURATION Specify disable-alerts duration (default: 1h).
|
||||||
|
|
||||||
enable-alerts 'COMMENT'
|
enable-alerts 'COMMENT'
|
||||||
|
|
||||||
Re-enable all alerts (only global for now)
|
Re-enable all alerts (only global for now)
|
||||||
|
@ -66,11 +74,21 @@ function usage_error {
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function now {
|
||||||
|
date --iso-8601=seconds
|
||||||
|
}
|
||||||
|
|
||||||
### CHECK ACTION ##########################
|
function log {
|
||||||
|
# $1: message
|
||||||
|
echo "$(now) - $1" >> "${log_path}"
|
||||||
|
}
|
||||||
|
|
||||||
# Print NRPE configuration without comments and in the same order
|
|
||||||
# than Nagios (taking account that order changes from Deb10)
|
### FUNCTIONS FOR CONFIGURATION READING ##########################
|
||||||
|
|
||||||
|
# Print NRPE configuration, with includes, without comments
|
||||||
|
# and in the same order than NRPE does (taking account that
|
||||||
|
# order changes from Deb10)
|
||||||
function get_conf_from_file {
|
function get_conf_from_file {
|
||||||
# $1: NRPE conf file (.cfg)
|
# $1: NRPE conf file (.cfg)
|
||||||
if [ ! -f "$1" ]; then return; fi
|
if [ ! -f "$1" ]; then return; fi
|
||||||
|
@ -89,6 +107,9 @@ function get_conf_from_file {
|
||||||
done <<< "${conf_lines}"
|
done <<< "${conf_lines}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Print NRPE configuration, with includes, without comments
|
||||||
|
# and in the same order than NRPE does (taking account that
|
||||||
|
# order changes from Deb10)
|
||||||
function get_conf_from_dir {
|
function get_conf_from_dir {
|
||||||
# $1: NRPE conf dir
|
# $1: NRPE conf dir
|
||||||
if [ ! -d "$1" ]; then return; fi
|
if [ ! -d "$1" ]; then return; fi
|
||||||
|
@ -111,94 +132,61 @@ function get_conf_from_dir {
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Print the checks that are configured in NRPE
|
||||||
function grep_conf {
|
function get_checks_list {
|
||||||
# $1: check name (load, disk1…)
|
echo "${conf_lines}" | grep -E "command\[check_.*\]=" | awk -F"[\\\[\\\]=]" '{sub("check_", "", $2); print $2}' | sort | uniq
|
||||||
# $2: nrpe conf file (.cfg)
|
|
||||||
grep -E -R --no-filename "^\s*(include(_dir)?=.+|command\[check_$1\])" "$2" | grep -v -E '^[[:blank:]]*#'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Print check commands, in the same order as they are declared in the conf,
|
# Print the commands defined for check $1 in NRPE configuration
|
||||||
# with respect to the include and include_dir directives, which are
|
function get_check_commands {
|
||||||
# explored recursively.
|
# $1: check name
|
||||||
function get_config_file_checks {
|
echo "$conf_lines" | grep -E "command\[check_$1\]" | cut -d'=' -f2-
|
||||||
# $1: check name (load, disk1…)
|
|
||||||
# $2: nrpe conf file (.cfg)
|
|
||||||
conf_lines=$(grep_conf "$1" "$2")
|
|
||||||
while read -r line; do
|
|
||||||
if [[ "${line}" =~ .*"check_$1".* ]]; then
|
|
||||||
echo "${line}" | cut -d'=' -f2-
|
|
||||||
|
|
||||||
elif [[ "${line}" =~ .*'include='.* ]]; then
|
|
||||||
conf_file=$(echo "${line}" | cut -d= -f2)
|
|
||||||
get_config_file_checks "$1" "${conf_file}"
|
|
||||||
|
|
||||||
elif [[ "${line}" =~ .*'include_dir='.* ]]; then
|
|
||||||
conf_dir=$(echo "${line}" | cut -d= -f2)
|
|
||||||
get_config_dir_checks "$1" "${conf_dir}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
done <<< "${conf_lines}"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Same as get_config_file_checks, but for a recursive search in a directory.
|
|
||||||
function get_config_dir_checks {
|
### CHECK ACTION ##########################
|
||||||
# $1: check name (load, disk1…)
|
|
||||||
# $2: nrpe conf dir
|
|
||||||
if [ "${debian_major_version}" -ge 10 ]; then
|
|
||||||
# From Deb10, NRPE use scandir() with alphasort() function
|
|
||||||
sort_command="sort"
|
|
||||||
else
|
|
||||||
# Before Deb10, NRPE use loaddir(), like find utility
|
|
||||||
sort_command="cat -"
|
|
||||||
fi
|
|
||||||
# Add conf files in dir to be processed recursively
|
|
||||||
for file in $(find "$2" -maxdepth 1 -name "*.cfg" | ${sort_command}); do
|
|
||||||
if [ -f "${file}" ]; then
|
|
||||||
get_config_file_checks "$1" "${file}"
|
|
||||||
elif [ -d "${file}" ]; then
|
|
||||||
get_config_dir_checks "$1" "${file}"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
function check {
|
function check {
|
||||||
|
# $1: check name
|
||||||
|
|
||||||
check_nrpe_bin=/usr/lib/nagios/plugins/check_nrpe
|
check_nrpe_bin=/usr/lib/nagios/plugins/check_nrpe
|
||||||
debian_major_version=$(cut -d "." -f 1 < /etc/debian_version)
|
|
||||||
|
|
||||||
if [ ! -f "${check_nrpe_bin}" ]; then
|
if [ ! -f "${check_nrpe_bin}" ]; then
|
||||||
>&2 echo "${check_nrpe_bin} is missing, please install nagios-nrpe-plugin package."
|
>&2 echo "${check_nrpe_bin} is missing, please install nagios-nrpe-plugin package."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
conf_lines=$(get_conf_from_file "${conf_path}")
|
|
||||||
|
|
||||||
server_address=$(echo "$conf_lines" | grep "server_address" | cut -d'=' -f2)
|
server_address=$(echo "$conf_lines" | grep "server_address" | cut -d'=' -f2)
|
||||||
if [ -z "${server_address}" ]; then server_address="127.0.0.1"; fi
|
if [ -z "${server_address}" ]; then server_address="127.0.0.1"; fi
|
||||||
|
|
||||||
server_port=$(echo "$conf_lines" | grep "server_port" | cut -d'=' -f2)
|
server_port=$(echo "$conf_lines" | grep "server_port" | cut -d'=' -f2)
|
||||||
if [ -z "${server_port}" ]; then server_port="5666"; fi
|
if [ -z "${server_port}" ]; then server_port="5666"; fi
|
||||||
|
|
||||||
found_commands=$(echo "$conf_lines" | grep -E "command\[check_$1\]" | cut -d'=' -f2-)
|
check_commands=$(get_check_commands "$1")
|
||||||
|
|
||||||
if [ -n "${found_commands}" ]; then
|
|
||||||
|
|
||||||
|
if [ -n "${check_commands}" ]; then
|
||||||
if [ "${verbose}" == "True" ]; then
|
if [ "${verbose}" == "True" ]; then
|
||||||
echo "Available commands (in config order, the last one overwrites the others):"
|
echo "Available commands (in config order, the last one overwrites the others):"
|
||||||
echo "$found_commands"
|
echo "$check_commands"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
nrpe_command=$(echo "${found_commands}" | tail -n1)
|
check_command=$(echo "${check_commands}" | tail -n1)
|
||||||
|
|
||||||
echo "Command used by NRPE:"
|
echo "Command used by NRPE:"
|
||||||
echo " ${nrpe_command}"
|
echo " ${check_command}"
|
||||||
|
|
||||||
else
|
else
|
||||||
>&2 echo "No command found in NRPE configuration for this check:"
|
>&2 echo "Warning: no command found in NRPE configuration for check '${1}'."
|
||||||
>&2 echo " $1"
|
if [ "${bypass_nrpe}" = "True" ]; then
|
||||||
|
>&2 echo "Aborted."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
request_command="${check_nrpe_bin} -H ${server_address} -p ${server_port} -c check_$1 2&>1"
|
if [ "${bypass_nrpe}" = "False" ]; then
|
||||||
|
request_command="${check_nrpe_bin} -H ${server_address} -p ${server_port} -c check_$1 2&>1"
|
||||||
|
else
|
||||||
|
request_command="sudo -u nagios -- ${check_command}"
|
||||||
|
fi
|
||||||
|
|
||||||
if [ "${verbose}" == "True" ]; then
|
if [ "${verbose}" == "True" ]; then
|
||||||
echo "Executing:"
|
echo "Executing:"
|
||||||
|
@ -208,14 +196,18 @@ function check {
|
||||||
check_output=$(${request_command})
|
check_output=$(${request_command})
|
||||||
rc=$?
|
rc=$?
|
||||||
|
|
||||||
echo "NRPE service output (on ${server_address}:5666):"
|
if [ "${bypass_nrpe}" = "False" ]; then
|
||||||
|
echo "NRPE service output (on ${server_address}:${server_port}):"
|
||||||
|
else
|
||||||
|
echo "Direct check output (bypassing NRPE):"
|
||||||
|
fi
|
||||||
echo "${check_output}"
|
echo "${check_output}"
|
||||||
|
|
||||||
exit "${rc}"
|
exit "${rc}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
### (EN|DIS)ABLE-ALERT ACTION ##########################
|
### (EN|DIS)ABLE-ALERTS ACTIONS ##########################
|
||||||
|
|
||||||
function filter_duration {
|
function filter_duration {
|
||||||
# Format (in brief): XdYhZm
|
# Format (in brief): XdYhZm
|
||||||
|
@ -229,10 +221,27 @@ function filter_duration {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Check that NRPE commands are wrapped by alerts_wrapper script
|
||||||
|
function is_nrpe_wrapped {
|
||||||
|
for check in $(get_checks_list); do
|
||||||
|
command=$(get_check_commands "${check}" | tail -n1)
|
||||||
|
echo "${command}" | grep --quiet --no-messages alerts_wrapper
|
||||||
|
rc=$?
|
||||||
|
if [ "${rc}" -ne 0 ]; then
|
||||||
|
>&2 echo "Warning: check '${check}' has no alerts_wrapper, it will not be disabled:"
|
||||||
|
>&2 echo " ${command}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
function disable-alerts {
|
function disable-alerts {
|
||||||
|
# $1: comment
|
||||||
|
|
||||||
#TODO
|
if ! command -v alerts_switch &> /dev/null; then
|
||||||
|
>&2 echo "Error: script 'alerts_switch' is not installed."
|
||||||
|
>&2 echo "Aborted."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# TODO: Check not disabled yet
|
# TODO: Check not disabled yet
|
||||||
|
|
||||||
|
@ -242,8 +251,8 @@ function disable-alerts {
|
||||||
Hint: use --duration DURATION to change default time length."
|
Hint: use --duration DURATION to change default time length."
|
||||||
fi
|
fi
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
Warning: alerts will be disabled for ${duration}${default_msg}
|
Alerts will be disabled for ${duration}${default_msg}
|
||||||
Check outputs will still be gathered by our monitoring system, so alerts history won't be lost.
|
Our monitoring system will continue to gather checks outputs, so alerts history won't be lost.
|
||||||
To re-enable alerts before ${duration}, execute (as root or with sudo):
|
To re-enable alerts before ${duration}, execute (as root or with sudo):
|
||||||
monitoringctl enable-alerts
|
monitoringctl enable-alerts
|
||||||
EOF
|
EOF
|
||||||
|
@ -251,7 +260,25 @@ EOF
|
||||||
|
|
||||||
read -r answer
|
read -r answer
|
||||||
if [ "$answer" = "Y" ] || [ "$answer" = "y" ]; then
|
if [ "$answer" = "Y" ] || [ "$answer" = "y" ]; then
|
||||||
#systemd-run --quiet --unit="" --on-calendar=""
|
log "Action disable-alerts requested for ${duration}: '${1}'"
|
||||||
|
for check in $(get_checks_list); do
|
||||||
|
# Log a warning if check has no wrapper
|
||||||
|
command=$(get_check_commands "${check}" | tail -n1)
|
||||||
|
echo "${command}" | grep --quiet --no-messages alerts_wrapper
|
||||||
|
rc=$?
|
||||||
|
if [ "${rc}" -ne 0 ]; then
|
||||||
|
log "Warning: check '${check}' has no alerts_wrapper, it will not be disabled."
|
||||||
|
fi
|
||||||
|
|
||||||
|
wrapper_names=$(get_check_commands "${check}" | tail -n1 | awk '{match($0, /.*--name\s+([^[:space:]]+)/, arr); print arr[1]}')
|
||||||
|
for name in $(echo "${wrapper_names=}" | tr ',' '\n'); do
|
||||||
|
echo "$(now) - Executing 'alerts_switch disable ${name}'" >> "${log_path}"
|
||||||
|
alerts_switch disable "${name}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
#TODO remove previous units if any
|
||||||
|
#TODO systemd-run --quiet --unit="" --on-calendar="" -- monitoringctl enable-alerts "[AUTO] ${}"
|
||||||
echo "Alerts are now disabled for ${duration}."
|
echo "Alerts are now disabled for ${duration}."
|
||||||
else
|
else
|
||||||
echo "Canceled."
|
echo "Canceled."
|
||||||
|
@ -260,19 +287,26 @@ EOF
|
||||||
exit 0
|
exit 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function enable-alerts {
|
function enable-alerts {
|
||||||
|
# $1: comment
|
||||||
|
|
||||||
#TODO
|
#TODO
|
||||||
|
|
||||||
echo "Alerts are re-enabled."
|
echo "Alerts are re-enabled (stub)."
|
||||||
#echo "Alerts were already enabled."
|
#echo "Alerts were already enabled."
|
||||||
|
|
||||||
exit 0
|
exit 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
### ALERTS-STATUS ACTION ##########################
|
||||||
|
|
||||||
|
function alerts-status {
|
||||||
|
# TODO
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
### MAIN #########################################
|
### MAIN #########################################
|
||||||
|
|
||||||
# No root
|
# No root
|
||||||
|
@ -293,6 +327,7 @@ comment=""
|
||||||
verbose="False"
|
verbose="False"
|
||||||
duration="1h"
|
duration="1h"
|
||||||
default_duration="True"
|
default_duration="True"
|
||||||
|
bypass_nrpe="False"
|
||||||
|
|
||||||
# Parse arguments and options
|
# Parse arguments and options
|
||||||
while :; do
|
while :; do
|
||||||
|
@ -303,6 +338,9 @@ while :; do
|
||||||
-v|--verbose)
|
-v|--verbose)
|
||||||
verbose="True"
|
verbose="True"
|
||||||
shift;;
|
shift;;
|
||||||
|
-b|--bypass-nrpe)
|
||||||
|
bypass_nrpe="True"
|
||||||
|
shift;;
|
||||||
-d|--duration)
|
-d|--duration)
|
||||||
if [ "${default_duration}" = "False" ]; then
|
if [ "${default_duration}" = "False" ]; then
|
||||||
usage_error "Option --duration: defined multiple times."
|
usage_error "Option --duration: defined multiple times."
|
||||||
|
@ -322,6 +360,11 @@ while :; do
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
|
debian_major_version=$(cut -d "." -f 1 < /etc/debian_version)
|
||||||
|
conf_lines=$(get_conf_from_file "${conf_path}")
|
||||||
|
|
||||||
|
|
||||||
if [ -z "${action}" ]; then
|
if [ -z "${action}" ]; then
|
||||||
usage_error "Missing or invalid ACTION argument."
|
usage_error "Missing or invalid ACTION argument."
|
||||||
fi
|
fi
|
||||||
|
@ -364,7 +407,17 @@ if [ "${action}" = "disable-alerts" ]; then
|
||||||
usage_error "Action disable-alerts: too many arguments."
|
usage_error "Action disable-alerts: too many arguments."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
is_nrpe_wrapped
|
||||||
|
|
||||||
comment="$1"
|
comment="$1"
|
||||||
disable-alerts "${comment}"
|
disable-alerts "${comment}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ "${action}" = "alerts-status" ]; then
|
||||||
|
if [ "$#" -gt 0 ]; then
|
||||||
|
usage_error "Action alerts-status: too many arguments."
|
||||||
|
fi
|
||||||
|
|
||||||
|
alerts-status
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue