ansible-roles/nagios-nrpe/files/monitoringctl
William Hirigoyen f268b4411f
All checks were successful
Ansible Lint |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |2696|0|2696|0|:zzz:
gitea/ansible-roles/pipeline/head This commit looks good
Version sant trop de bugs :) Gère si les noms des wrappers sont différents des checks, multiples, ou autres cas bizarres
2024-03-29 10:45:01 +01:00

564 lines
17 KiB
Bash
Executable file

#!/usr/bin/env bash
#set -x
readonly PROGNAME=$(basename $0)
readonly VERSION="24.04.00"
readonly red="\e[0;31m"
readonly green="\e[0;32m"
readonly orange="\e[0;33m"
readonly lightgreen="\e[1;32m"
readonly yellow="\e[1;33m"
readonly lightblue="\e[1;34m"
readonly purple="\e[0;35m"
readonly nocolor="\e[0m"
# Load common functions and vars
readonly lib_dir="/usr/local/lib/monitoringctl"
if [ -r "${lib_dir}/common" ]; then
# shellcheck source=monitoringctl_common
source "${lib_dir}/common"
else
>&2 echo "Error: missing ${lib_dir}/common file."
exit 1
fi
function show_help() {
cat <<EOF
monitoringctl version ${VERSION}.
monitoringctl gives some control over NRPE checks and alerts.
Usage: monitoringctl [OPTIONS] ACTION ARGUMENTS
GENERAL OPTIONS:
-h, --help Print this message and exit.
-V, --version Print version number and exit.
ACTIONS:
status [CHECK_NAME|all]
Print whether alerts are enabled or not (silenced).
If alerts are disabled (silenced), show disable message and time left before automatic re-enabling.
check [--bypass-nrpe] CHECK_NAME
Ask CHECK_NAME status to NRPE as an HTTP request.
Indicates which command NRPE has supposedly run (from its configuration).
-b, --bypass-nrpe Execute directly command from NRPE configuration,
as user nagios, without passing the request to NRPE.
disable CHECK_NAME|all [--during DURATION] [--message 'DISABLE MESSAGE']
Disable (silence) CHECK_NAME or all alerts for DURATION and write DISABLE MESSAGE into the log.
Checks output is still printed, so alerts history won't be lost.
enable CHECK_NAME|all [--message 'ENABLE MESSAGE']
Re-enable CHECK_NAME or all alerts
show CHECK_NAME
Show NPRE command(s) configured for CHECK_NAME
MESSAGE:
Message to be written in log and disabled check output (mandatory, will be asked dynamically if not provided).
DURATION:
Time (string) during which alerts will be disabled (optional, default: "1h").
Format:
You can use 'd' (day), 'h' (hour) and 'm' (minute) , or a combination of them, to specify a duration.
Examples: '2d', '1h', '10m', '1h10' ('m' is guessed).
NOTES
For actions disable, enable and status, CHECK_NAME is actually the --name option passed to alerts_wrapper, and not the NRPE check name. Both check name and alerts_wrapper --name option should be equal in NRPE configuration to avoid confusion.
Log path: ${log_file}
EOF
}
function check() {
# $1: check name, "all" or empty
readonly check_nrpe_bin="/usr/lib/nagios/plugins/check_nrpe"
if [ ! -f "${check_nrpe_bin}" ]; then
>&2 echo "${check_nrpe_bin} is missing, please install nagios-nrpe-plugin package."
exit 1
fi
conf_lines="$(get_nrpe_conf "${nrpe_conf_path}")"
server_address=$(echo "$conf_lines" | grep "server_address" | tail -n1 | cut -d'=' -f2)
if [ -z "${server_address}" ]; then server_address="127.0.0.1"; fi
server_port=$(echo "$conf_lines" | grep "server_port" | tail -n1 | cut -d'=' -f2)
if [ -z "${server_port}" ]; then server_port="5666"; fi
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
checks="$(get_checks_names)"
header="Check\tStatus\tOutput (truncated)"
underline="-----\t------\t------------------"
str_out="${header}\n${underline}\n"
else
checks="${1}"
if [ "${bypass_nrpe}" = "False" ]; then
str_out="NRPE service output (on ${server_address}:${server_port}):\n"
else
str_out="Direct check output (bypassing NRPE):\n"
fi
fi
for check in $checks; do
err_msg=""
if [ "${bypass_nrpe}" = "False" ]; then
request_command="${check_nrpe_bin} -H ${server_address} -p ${server_port} -c check_${check} 2&>1"
else
check_commands="$(get_check_commands "${check}")"
if [ -n "${check_commands}" ]; then
check_command="$(echo "${check_commands}")"
request_command="sudo -u nagios -- ${check_command}"
else
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
err_msg="Check command not found in NRPE configuration."
else
err_mgs="Error: no command found in NRPE configuration for check '${check}'. Aborted."
fi
fi
fi
if [ -z "${err_msg}" ]; then
check_output="$(${request_command})"
rc="$?"
check_output="$(echo "${check_output}" | tr '\n' ' ')"
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
term_cols="$(tput cols)"
if [ "${#check_output}" -gt 60 ]; then
check_output="$(echo "${check_output}" | cut -c-80) [...]"
fi
fi
else
check_output="${err_msg}"
rc="3"
fi
case "${rc}" in
0)
rc_str="OK"
color="${green}"
;;
1)
rc_str="Warning"
color="${orange}"
;;
2)
rc_str="Critical"
color="${red}"
;;
3)
rc_str="Unknown"
color="${purple}"
;;
*)
rc_str="Unknown"
color="${purple}"
esac
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
str_out="${str_out}${color}${check}\t${rc_str}\t${check_output}${nocolor}\n"
fi
done
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
echo -e "${str_out}" | column -t -s $'\t'
else
echo -e "${str_out}${color}${check_output}${nocolor}" | sed 's/|/\n/g'
exit "${rc}"
fi
}
# Print error message and exit if not installed
function alerts_switch_is_installed() {
if ! command -v alerts_switch &> /dev/null; then
error "Error: script 'alerts_switch' is not installed. Aborted."
fi
}
function disable_alerts() {
# $1: check name, $2: disable message
alerts_switch_is_installed
if [ "${1}" = "all" ]; then
checks="$(get_checks_names)"
else
checks="${1}"
fi
warn_not_wrapped "${checks}"
warn_wrapper_names "${checks}"
if [ -z "${2}" ]; then
echo -n "> Please provide a disable message (for logging and check output): "
read -r message
echo ''
if [ -z "${message}" ]; then
error "Error: disable message is mandatory."
fi
else
message="${2}"
fi
default_msg=""
if [ "${default_duration}" = "True" ]; then
default_msg=" (use --during to change default time)"
fi
if [ "${1}" = "all" ]; then
check_txt="All checks"
else
check_txt="Check ${1}"
fi
main_msg="${check_txt} will be disabled for ${duration}${default_msg}."
main_msg_len="${#main_msg}"
line="$(printf '─%.0s' $(eval "echo {1.."${main_msg_len}"}"))"
cat <<EOF
┌${line}┐
│${main_msg}│
└${line}┘
Additional information:
* Alerts history is kept in our monitoring system.
* To re-enable alert(s) before ${duration}, execute as root or with sudo: 'monitoringctl enable ${1}'
EOF
if [ "${1}" != "all" ]; then
wrapper="$(get_check_wrapper_name "${1}")"
checks="$(get_wrapper_checks "${wrapper}")"
n_checks="$(echo "${checks}" | wc -w)"
if [ "${n_checks}" -gt 1 ]; then
other_checks="$(echo "${checks}" | xargs -n1 | grep -v "${1}" | xargs)"
>&2 echo -e "${orange}Warning:${nocolor} because they have the same configuration, disabling ${1} will also disable: ${other_checks}.\n"
log "Warning: check ${1} will also disable ${other_checks} (which have the same wrapper name)."
fi
else
wrapper="all"
fi
echo -n "> Confirm (y/N)? "
read -r answer
if [ "${answer}" != "Y" ] && [ "${answer}" != "y" ]; then
echo -e "${orange}Canceled.${nocolor}" && exit 0
fi
log "Action disable ${1} requested for ${duration} by user $(logname || echo unknown)."
alerts_switch disable "${wrapper}" --during "${duration}" --message "${message}"
if [ "${1}" != "all" ]; then
if [ "${n_checks}" -eq 1 ]; then
echo -e "${orange}Check ${1} alerts are now disabled for ${duration}.${nocolor}"
else
echo -e "${orange}Alerts are now disabled for ${duration} for checks: ${checks}.${nocolor}"
fi
else
echo -e "${orange}All alerts are now disabled for ${duration}.${nocolor}"
fi
}
function enable_alerts() {
# $1: check name, $2: enable message
alerts_switch_is_installed
if [ "${1}" != "all" ]; then
# Verify that check is not already enabled
is_disabled="$(is_disabled "${1}")"
if [ "${is_disabled}" = "False" ]; then
echo "${1} is already enabled, see 'monitoringctl status'"
exit 0
fi
fi
if [ -z "${2}" ]; then
echo -n "> Please provide an enable message (for logging): "
read -r message
echo ''
if [ -z "${message}" ]; then
error "Error: disable message is mandatory."
fi
else
message="${2}"
fi
log "Action enable ${1} requested by user $(logname || echo unknown)."
if [ "${1}" != "all" ]; then
wrapper="$(get_check_wrapper_name "${1}")"
checks="$(get_wrapper_checks "${wrapper}")"
n_checks="$(echo "${checks}" | wc -w)"
if [ "${n_checks}" -gt 1 ]; then
other_checks="$(echo "${checks}" | xargs -n1 | grep -v "${1}" | xargs)"
>&2 echo -e "${orange}Warning:${nocolor} because they have the same configuration, enabling ${1} will also enable: ${other_checks}.\n"
log "Warning: check ${1} will also enable ${other_checks} (which have the same wrapper name)."
fi
else
wrapper="all"
fi
alerts_switch enable "${wrapper}" --message "${message}"
if [ "${1}" != "all" ]; then
if [ "${n_checks}" -eq 1 ]; then
echo -e "${green}Check ${1} alerts are now enabled.${nocolor}"
else
echo -e "${green}Alerts are now enabled for checks: ${checks}.${nocolor}"
fi
else
echo -e "${green}All alerts are now enabled.${nocolor}"
fi
}
# Show NRPE command(s) configured for a check
function show_check_commands() {
# $1: check name
check_commands=$(get_check_commands "${1}")
if [ -z "${check_commands}" ]; then
usage_error "Error: no command found in NRPE configuration for check '${1}."
fi
echo "Available commands (in config order, the last one overwrites the others):"
echo " $check_commands"
check_command=$(echo "${check_commands}" | tail -n1)
echo "Command used by NRPE:"
echo " ${check_command}"
}
# Print a warning if some wrappers have the same name
# or if a name is different from the check.
function warn_wrapper_names() {
#$1: checks to verify
warned="False"
for check in ${1}; do
wrapper_name="$(get_check_wrapper_name "${check}")"
if [ -n "${wrapper_name}" ] && [ "${wrapper_name}" != "${check}" ]; then
>&2 echo -e "${orange}Warning:${nocolor} ${check} check has wrapper name ${wrapper_name}."
warned="True"
fi
done
if [ "${warned}" = "True" ]; then
>&2 echo -e "${orange}It is recommanded to name the wrappers the same as the checks.${nocolor}\n"
fi
}
# Print a warning if some checks are not wrapped
function warn_not_wrapped() {
#$1: checks to verify
unwrappeds="$(not_wrapped_checks)"
unwrapped_checks="$(comm -12 <(echo "${1}") <(echo "${unwrappeds}"))"
if [ -n "${unwrapped_checks}" ]; then
n_unwrapped="$(echo "${unwrapped_checks}" | wc -w)"
if [ "${n_unwrapped}" -eq 1 ]; then
>&2 echo -e "${orange}Warning:${nocolor} ${unwrapped_checks} check is not wrapped, it will not be disabled."
else
>&2 echo -e -n "${orange}Warning:${nocolor} some checks are not configured, they will not be disabled:"
for unwrapped in ${unwrapped_checks}; do
>&2 echo -e -n " %s" "${unwrapped}"
done
>&2 echo -e -n "\n"
fi
log "Warning: some checks have no alerts_wrapper, they will not be disabled: $(echo "${unwrapped_checks}" | xargs)"
fi
}
# Echo which checks are enabled or disabled and time left
function alerts_status() {
# $1: check name, "all" or empty
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
checks="$(get_checks_names)"
else
checks="${1}"
fi
warn_wrapper_names "${checks}"
header="Check\tStatus\tRe-enable time\tDisable message"
underline="-----\t------\t--------------\t---------------"
str_out="${header}\n${underline}\n"
for check in $checks; do
enable_str=""
status_str="Enabled"
disable_msg=""
if ! is_wrapped "${check}"; then
status_str="Not configured"
else
is_disabled="$(is_disabled "${check}")"
wrapper_name="$(get_check_wrapper_name "${check}")"
if [ "${is_disabled}" = "True" ]; then
status_str="Disabled"
enable_time="$(get_enable_time "${wrapper_name}")"
enable_delay="$(enable_delay "${enable_time}")"
delay_str="$(delay_to_string "${enable_delay}")"
enable_date="$(date --date "+${enable_delay} seconds" "+%d %h %Y at %H:%M:%S")"
enable_str="${enable_date} (${delay_str} left)"
disable_msg="$(get_disable_message "${wrapper_name}")"
fi
fi
case "${status_str}" in
"Enabled")
color="${green}"
;;
"Disabled")
color="${orange}"
;;
*)
color="${red}"
esac
str_out="${str_out}${color}${check}\t${status_str}\t${enable_str}${nocolor}\t${disable_msg}\n"
done
echo -e "${str_out}" | column -t -s $'\t'
}
### MAIN #########################################
# No root
if [ "$(id -u)" -ne 0 ]; then
>&2 echo "You need to be root (or use sudo) to run ${0}!"
exit 1
fi
# No argument
if [ "$#" = "0" ]; then
show_help
exit 1
fi
# Default arguments and options
action=""
message=""
duration="${default_disabled_time}"
bypass_nrpe="False"
default_duration="True"
# Parse arguments and options
while :; do
case "${1}" in
-h|-\?|--help)
show_help
exit 0;;
-V|--version)
show_version
exit 0;;
-b|--bypass-nrpe)
bypass_nrpe="True"
shift;;
-d|--during)
if [ "${default_duration}" = "False" ]; then
usage_error "Option --during: defined multiple times."
fi
if [ "$#" -lt 2 ]; then
usage_error "Option --during: missing value."
fi
if filter_duration "${2}"; then
duration="${2}"
else
usage_error "Option --during: \"${2}\" is not a valid duration."
fi
default_duration="False"
shift; shift;;
-m|--message)
if [ "$#" -lt 2 ]; then
usage_error "Option --message: missing message string."
fi
message="${2}"
shift; shift;;
status|check|enable|disable|show)
action="${1}"
shift;;
*)
if [ -z "${1}" ]; then
break
fi
case "${action}" in
status)
if is_check "${1}" || [ "${1}" = "all" ]; then
check_name="${1}"
else
usage_error "Action ${action}: unknown check '${1}'."
fi
;;
check|show)
if is_check "${1}"; then
check_name="${1}"
else
usage_error "Action ${action}: unknown check '${1}'."
fi
;;
enable|disable)
if is_wrapper "${1}" || is_check "${1}" || [ "${1}" = "all" ]; then
check_name="${1}"
else
# We use the word "check" for the end user,
# but this is actually "unknown wrapper"
usage_error "Action ${action}: unknown check '${1}'."
fi
;;
*)
usage_error "Missing or invalid ACTION argument."
;;
esac
shift;;
esac
done
if [ "$#" -gt 0 ]; then
usage_error "Too many arguments."
fi
case "${action}" in
disable|enable|show)
if [ -z "${check_name}" ]; then
usage_error "Action ${action}: missing CHECK_NAME argument."
fi
;;
esac
if [ ! "${action}" = "disable" ]; then
if [ "${default_duration}" = "False" ]; then
usage_error "Action ${action}: there is no --during option."
fi
fi
case "${action}" in
status)
alerts_status "${check_name}"
;;
check)
check "${check_name}"
;;
show)
show_check_commands "${check_name}"
;;
enable)
enable_alerts "${check_name}" "${message}"
;;
disable)
disable_alerts "${check_name}" "${message}"
;;
esac