564 lines
17 KiB
Bash
Executable file
564 lines
17 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
#set -x
|
|
|
|
readonly PROGNAME=$(basename $0)
|
|
readonly VERSION="24.04.00"
|
|
|
|
readonly red="\e[0;31m"
|
|
readonly green="\e[0;32m"
|
|
readonly orange="\e[0;33m"
|
|
readonly lightgreen="\e[1;32m"
|
|
readonly yellow="\e[1;33m"
|
|
readonly lightblue="\e[1;34m"
|
|
readonly purple="\e[0;35m"
|
|
readonly nocolor="\e[0m"
|
|
|
|
# Load common functions and vars
|
|
readonly lib_dir="/usr/local/lib/monitoringctl"
|
|
if [ -r "${lib_dir}/common" ]; then
|
|
# shellcheck source=monitoringctl_common
|
|
source "${lib_dir}/common"
|
|
else
|
|
>&2 echo "Error: missing ${lib_dir}/common file."
|
|
exit 1
|
|
fi
|
|
|
|
function show_help() {
|
|
cat <<EOF
|
|
monitoringctl version ${VERSION}.
|
|
|
|
monitoringctl gives some control over NRPE checks and alerts.
|
|
|
|
Usage: monitoringctl [OPTIONS] ACTION ARGUMENTS
|
|
|
|
GENERAL OPTIONS:
|
|
|
|
-h, --help Print this message and exit.
|
|
-V, --version Print version number and exit.
|
|
|
|
ACTIONS:
|
|
|
|
status [CHECK_NAME|all]
|
|
|
|
Print whether alerts are enabled or not (silenced).
|
|
If alerts are disabled (silenced), show disable message and time left before automatic re-enabling.
|
|
|
|
check [--bypass-nrpe] CHECK_NAME
|
|
|
|
Ask CHECK_NAME status to NRPE as an HTTP request.
|
|
Indicates which command NRPE has supposedly run (from its configuration).
|
|
-b, --bypass-nrpe Execute directly command from NRPE configuration,
|
|
as user nagios, without passing the request to NRPE.
|
|
|
|
disable CHECK_NAME|all [--during DURATION] [--message 'DISABLE MESSAGE']
|
|
|
|
Disable (silence) CHECK_NAME or all alerts for DURATION and write DISABLE MESSAGE into the log.
|
|
Checks output is still printed, so alerts history won't be lost.
|
|
|
|
enable CHECK_NAME|all [--message 'ENABLE MESSAGE']
|
|
|
|
Re-enable CHECK_NAME or all alerts
|
|
|
|
show CHECK_NAME
|
|
|
|
Show NPRE command(s) configured for CHECK_NAME
|
|
|
|
MESSAGE:
|
|
|
|
Message to be written in log and disabled check output (mandatory, will be asked dynamically if not provided).
|
|
|
|
DURATION:
|
|
|
|
Time (string) during which alerts will be disabled (optional, default: "1h").
|
|
|
|
Format:
|
|
You can use 'd' (day), 'h' (hour) and 'm' (minute) , or a combination of them, to specify a duration.
|
|
Examples: '2d', '1h', '10m', '1h10' ('m' is guessed).
|
|
|
|
NOTES
|
|
|
|
For actions disable, enable and status, CHECK_NAME is actually the --name option passed to alerts_wrapper, and not the NRPE check name. Both check name and alerts_wrapper --name option should be equal in NRPE configuration to avoid confusion.
|
|
|
|
Log path: ${log_file}
|
|
|
|
EOF
|
|
}
|
|
|
|
function check() {
|
|
# $1: check name, "all" or empty
|
|
readonly check_nrpe_bin="/usr/lib/nagios/plugins/check_nrpe"
|
|
if [ ! -f "${check_nrpe_bin}" ]; then
|
|
>&2 echo "${check_nrpe_bin} is missing, please install nagios-nrpe-plugin package."
|
|
exit 1
|
|
fi
|
|
|
|
conf_lines="$(get_nrpe_conf "${nrpe_conf_path}")"
|
|
|
|
server_address=$(echo "$conf_lines" | grep "server_address" | tail -n1 | cut -d'=' -f2)
|
|
if [ -z "${server_address}" ]; then server_address="127.0.0.1"; fi
|
|
|
|
server_port=$(echo "$conf_lines" | grep "server_port" | tail -n1 | cut -d'=' -f2)
|
|
if [ -z "${server_port}" ]; then server_port="5666"; fi
|
|
|
|
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
|
|
checks="$(get_checks_names)"
|
|
header="Check\tStatus\tOutput (truncated)"
|
|
underline="-----\t------\t------------------"
|
|
str_out="${header}\n${underline}\n"
|
|
else
|
|
checks="${1}"
|
|
if [ "${bypass_nrpe}" = "False" ]; then
|
|
str_out="NRPE service output (on ${server_address}:${server_port}):\n"
|
|
else
|
|
str_out="Direct check output (bypassing NRPE):\n"
|
|
fi
|
|
fi
|
|
|
|
for check in $checks; do
|
|
err_msg=""
|
|
if [ "${bypass_nrpe}" = "False" ]; then
|
|
request_command="${check_nrpe_bin} -H ${server_address} -p ${server_port} -c check_${check} 2&>1"
|
|
else
|
|
check_commands="$(get_check_commands "${check}")"
|
|
if [ -n "${check_commands}" ]; then
|
|
check_command="$(echo "${check_commands}")"
|
|
|
|
request_command="sudo -u nagios -- ${check_command}"
|
|
else
|
|
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
|
|
err_msg="Check command not found in NRPE configuration."
|
|
else
|
|
err_mgs="Error: no command found in NRPE configuration for check '${check}'. Aborted."
|
|
fi
|
|
fi
|
|
fi
|
|
if [ -z "${err_msg}" ]; then
|
|
check_output="$(${request_command})"
|
|
rc="$?"
|
|
check_output="$(echo "${check_output}" | tr '\n' ' ')"
|
|
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
|
|
term_cols="$(tput cols)"
|
|
|
|
if [ "${#check_output}" -gt 60 ]; then
|
|
check_output="$(echo "${check_output}" | cut -c-80) [...]"
|
|
fi
|
|
fi
|
|
else
|
|
check_output="${err_msg}"
|
|
rc="3"
|
|
fi
|
|
|
|
case "${rc}" in
|
|
0)
|
|
rc_str="OK"
|
|
color="${green}"
|
|
;;
|
|
1)
|
|
rc_str="Warning"
|
|
color="${orange}"
|
|
;;
|
|
2)
|
|
rc_str="Critical"
|
|
color="${red}"
|
|
;;
|
|
3)
|
|
rc_str="Unknown"
|
|
color="${purple}"
|
|
;;
|
|
*)
|
|
rc_str="Unknown"
|
|
color="${purple}"
|
|
esac
|
|
|
|
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
|
|
str_out="${str_out}${color}${check}\t${rc_str}\t${check_output}${nocolor}\n"
|
|
fi
|
|
done
|
|
|
|
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
|
|
echo -e "${str_out}" | column -t -s $'\t'
|
|
else
|
|
echo -e "${str_out}${color}${check_output}${nocolor}" | sed 's/|/\n/g'
|
|
exit "${rc}"
|
|
fi
|
|
}
|
|
|
|
# Print error message and exit if not installed
|
|
function alerts_switch_is_installed() {
|
|
if ! command -v alerts_switch &> /dev/null; then
|
|
error "Error: script 'alerts_switch' is not installed. Aborted."
|
|
fi
|
|
}
|
|
|
|
function disable_alerts() {
|
|
# $1: check name, $2: disable message
|
|
alerts_switch_is_installed
|
|
|
|
if [ "${1}" = "all" ]; then
|
|
checks="$(get_checks_names)"
|
|
else
|
|
checks="${1}"
|
|
fi
|
|
|
|
warn_not_wrapped "${checks}"
|
|
warn_wrapper_names "${checks}"
|
|
|
|
if [ -z "${2}" ]; then
|
|
echo -n "> Please provide a disable message (for logging and check output): "
|
|
read -r message
|
|
echo ''
|
|
if [ -z "${message}" ]; then
|
|
error "Error: disable message is mandatory."
|
|
fi
|
|
else
|
|
message="${2}"
|
|
fi
|
|
|
|
default_msg=""
|
|
if [ "${default_duration}" = "True" ]; then
|
|
default_msg=" (use --during to change default time)"
|
|
fi
|
|
|
|
if [ "${1}" = "all" ]; then
|
|
check_txt="All checks"
|
|
else
|
|
check_txt="Check ${1}"
|
|
fi
|
|
main_msg="${check_txt} will be disabled for ${duration}${default_msg}."
|
|
main_msg_len="${#main_msg}"
|
|
line="$(printf '─%.0s' $(eval "echo {1.."${main_msg_len}"}"))"
|
|
cat <<EOF
|
|
┌${line}┐
|
|
│${main_msg}│
|
|
└${line}┘
|
|
|
|
Additional information:
|
|
* Alerts history is kept in our monitoring system.
|
|
* To re-enable alert(s) before ${duration}, execute as root or with sudo: 'monitoringctl enable ${1}'
|
|
|
|
EOF
|
|
|
|
if [ "${1}" != "all" ]; then
|
|
wrapper="$(get_check_wrapper_name "${1}")"
|
|
checks="$(get_wrapper_checks "${wrapper}")"
|
|
n_checks="$(echo "${checks}" | wc -w)"
|
|
if [ "${n_checks}" -gt 1 ]; then
|
|
other_checks="$(echo "${checks}" | xargs -n1 | grep -v "${1}" | xargs)"
|
|
>&2 echo -e "${orange}Warning:${nocolor} because they have the same configuration, disabling ${1} will also disable: ${other_checks}.\n"
|
|
log "Warning: check ${1} will also disable ${other_checks} (which have the same wrapper name)."
|
|
fi
|
|
else
|
|
wrapper="all"
|
|
fi
|
|
|
|
echo -n "> Confirm (y/N)? "
|
|
read -r answer
|
|
if [ "${answer}" != "Y" ] && [ "${answer}" != "y" ]; then
|
|
echo -e "${orange}Canceled.${nocolor}" && exit 0
|
|
fi
|
|
|
|
log "Action disable ${1} requested for ${duration} by user $(logname || echo unknown)."
|
|
|
|
alerts_switch disable "${wrapper}" --during "${duration}" --message "${message}"
|
|
|
|
if [ "${1}" != "all" ]; then
|
|
if [ "${n_checks}" -eq 1 ]; then
|
|
echo -e "${orange}Check ${1} alerts are now disabled for ${duration}.${nocolor}"
|
|
else
|
|
echo -e "${orange}Alerts are now disabled for ${duration} for checks: ${checks}.${nocolor}"
|
|
fi
|
|
else
|
|
echo -e "${orange}All alerts are now disabled for ${duration}.${nocolor}"
|
|
fi
|
|
}
|
|
|
|
function enable_alerts() {
|
|
# $1: check name, $2: enable message
|
|
alerts_switch_is_installed
|
|
|
|
if [ "${1}" != "all" ]; then
|
|
# Verify that check is not already enabled
|
|
is_disabled="$(is_disabled "${1}")"
|
|
if [ "${is_disabled}" = "False" ]; then
|
|
echo "${1} is already enabled, see 'monitoringctl status'"
|
|
exit 0
|
|
fi
|
|
fi
|
|
|
|
if [ -z "${2}" ]; then
|
|
echo -n "> Please provide an enable message (for logging): "
|
|
read -r message
|
|
echo ''
|
|
if [ -z "${message}" ]; then
|
|
error "Error: disable message is mandatory."
|
|
fi
|
|
else
|
|
message="${2}"
|
|
fi
|
|
|
|
log "Action enable ${1} requested by user $(logname || echo unknown)."
|
|
|
|
if [ "${1}" != "all" ]; then
|
|
wrapper="$(get_check_wrapper_name "${1}")"
|
|
checks="$(get_wrapper_checks "${wrapper}")"
|
|
n_checks="$(echo "${checks}" | wc -w)"
|
|
if [ "${n_checks}" -gt 1 ]; then
|
|
other_checks="$(echo "${checks}" | xargs -n1 | grep -v "${1}" | xargs)"
|
|
>&2 echo -e "${orange}Warning:${nocolor} because they have the same configuration, enabling ${1} will also enable: ${other_checks}.\n"
|
|
log "Warning: check ${1} will also enable ${other_checks} (which have the same wrapper name)."
|
|
fi
|
|
else
|
|
wrapper="all"
|
|
fi
|
|
|
|
alerts_switch enable "${wrapper}" --message "${message}"
|
|
|
|
if [ "${1}" != "all" ]; then
|
|
if [ "${n_checks}" -eq 1 ]; then
|
|
echo -e "${green}Check ${1} alerts are now enabled.${nocolor}"
|
|
else
|
|
echo -e "${green}Alerts are now enabled for checks: ${checks}.${nocolor}"
|
|
fi
|
|
else
|
|
echo -e "${green}All alerts are now enabled.${nocolor}"
|
|
fi
|
|
}
|
|
|
|
# Show NRPE command(s) configured for a check
|
|
function show_check_commands() {
|
|
# $1: check name
|
|
check_commands=$(get_check_commands "${1}")
|
|
|
|
if [ -z "${check_commands}" ]; then
|
|
usage_error "Error: no command found in NRPE configuration for check '${1}."
|
|
fi
|
|
|
|
echo "Available commands (in config order, the last one overwrites the others):"
|
|
echo " $check_commands"
|
|
|
|
check_command=$(echo "${check_commands}" | tail -n1)
|
|
echo "Command used by NRPE:"
|
|
echo " ${check_command}"
|
|
}
|
|
|
|
# Print a warning if some wrappers have the same name
|
|
# or if a name is different from the check.
|
|
function warn_wrapper_names() {
|
|
#$1: checks to verify
|
|
warned="False"
|
|
for check in ${1}; do
|
|
wrapper_name="$(get_check_wrapper_name "${check}")"
|
|
if [ -n "${wrapper_name}" ] && [ "${wrapper_name}" != "${check}" ]; then
|
|
>&2 echo -e "${orange}Warning:${nocolor} ${check} check has wrapper name ${wrapper_name}."
|
|
warned="True"
|
|
fi
|
|
done
|
|
if [ "${warned}" = "True" ]; then
|
|
>&2 echo -e "${orange}It is recommanded to name the wrappers the same as the checks.${nocolor}\n"
|
|
fi
|
|
}
|
|
|
|
# Print a warning if some checks are not wrapped
|
|
function warn_not_wrapped() {
|
|
#$1: checks to verify
|
|
unwrappeds="$(not_wrapped_checks)"
|
|
unwrapped_checks="$(comm -12 <(echo "${1}") <(echo "${unwrappeds}"))"
|
|
if [ -n "${unwrapped_checks}" ]; then
|
|
n_unwrapped="$(echo "${unwrapped_checks}" | wc -w)"
|
|
if [ "${n_unwrapped}" -eq 1 ]; then
|
|
>&2 echo -e "${orange}Warning:${nocolor} ${unwrapped_checks} check is not wrapped, it will not be disabled."
|
|
else
|
|
>&2 echo -e -n "${orange}Warning:${nocolor} some checks are not configured, they will not be disabled:"
|
|
for unwrapped in ${unwrapped_checks}; do
|
|
>&2 echo -e -n " %s" "${unwrapped}"
|
|
done
|
|
>&2 echo -e -n "\n"
|
|
fi
|
|
|
|
log "Warning: some checks have no alerts_wrapper, they will not be disabled: $(echo "${unwrapped_checks}" | xargs)"
|
|
fi
|
|
}
|
|
|
|
# Echo which checks are enabled or disabled and time left
|
|
function alerts_status() {
|
|
# $1: check name, "all" or empty
|
|
if [ -z "${1}" ] || [ "${1}" = "all" ]; then
|
|
checks="$(get_checks_names)"
|
|
else
|
|
checks="${1}"
|
|
fi
|
|
|
|
warn_wrapper_names "${checks}"
|
|
|
|
header="Check\tStatus\tRe-enable time\tDisable message"
|
|
underline="-----\t------\t--------------\t---------------"
|
|
str_out="${header}\n${underline}\n"
|
|
|
|
for check in $checks; do
|
|
enable_str=""
|
|
status_str="Enabled"
|
|
disable_msg=""
|
|
if ! is_wrapped "${check}"; then
|
|
status_str="Not configured"
|
|
else
|
|
is_disabled="$(is_disabled "${check}")"
|
|
wrapper_name="$(get_check_wrapper_name "${check}")"
|
|
if [ "${is_disabled}" = "True" ]; then
|
|
status_str="Disabled"
|
|
enable_time="$(get_enable_time "${wrapper_name}")"
|
|
enable_delay="$(enable_delay "${enable_time}")"
|
|
delay_str="$(delay_to_string "${enable_delay}")"
|
|
enable_date="$(date --date "+${enable_delay} seconds" "+%d %h %Y at %H:%M:%S")"
|
|
enable_str="${enable_date} (${delay_str} left)"
|
|
disable_msg="$(get_disable_message "${wrapper_name}")"
|
|
fi
|
|
fi
|
|
case "${status_str}" in
|
|
"Enabled")
|
|
color="${green}"
|
|
;;
|
|
"Disabled")
|
|
color="${orange}"
|
|
;;
|
|
*)
|
|
color="${red}"
|
|
esac
|
|
str_out="${str_out}${color}${check}\t${status_str}\t${enable_str}${nocolor}\t${disable_msg}\n"
|
|
done
|
|
|
|
echo -e "${str_out}" | column -t -s $'\t'
|
|
}
|
|
|
|
|
|
### MAIN #########################################
|
|
|
|
# No root
|
|
if [ "$(id -u)" -ne 0 ]; then
|
|
>&2 echo "You need to be root (or use sudo) to run ${0}!"
|
|
exit 1
|
|
fi
|
|
|
|
# No argument
|
|
if [ "$#" = "0" ]; then
|
|
show_help
|
|
exit 1
|
|
fi
|
|
|
|
# Default arguments and options
|
|
action=""
|
|
message=""
|
|
duration="${default_disabled_time}"
|
|
bypass_nrpe="False"
|
|
default_duration="True"
|
|
|
|
# Parse arguments and options
|
|
while :; do
|
|
case "${1}" in
|
|
-h|-\?|--help)
|
|
show_help
|
|
exit 0;;
|
|
-V|--version)
|
|
show_version
|
|
exit 0;;
|
|
-b|--bypass-nrpe)
|
|
bypass_nrpe="True"
|
|
shift;;
|
|
-d|--during)
|
|
if [ "${default_duration}" = "False" ]; then
|
|
usage_error "Option --during: defined multiple times."
|
|
fi
|
|
if [ "$#" -lt 2 ]; then
|
|
usage_error "Option --during: missing value."
|
|
fi
|
|
if filter_duration "${2}"; then
|
|
duration="${2}"
|
|
else
|
|
usage_error "Option --during: \"${2}\" is not a valid duration."
|
|
fi
|
|
default_duration="False"
|
|
shift; shift;;
|
|
-m|--message)
|
|
if [ "$#" -lt 2 ]; then
|
|
usage_error "Option --message: missing message string."
|
|
fi
|
|
message="${2}"
|
|
shift; shift;;
|
|
status|check|enable|disable|show)
|
|
action="${1}"
|
|
shift;;
|
|
*)
|
|
if [ -z "${1}" ]; then
|
|
break
|
|
fi
|
|
|
|
case "${action}" in
|
|
status)
|
|
if is_check "${1}" || [ "${1}" = "all" ]; then
|
|
check_name="${1}"
|
|
else
|
|
usage_error "Action ${action}: unknown check '${1}'."
|
|
fi
|
|
;;
|
|
check|show)
|
|
if is_check "${1}"; then
|
|
check_name="${1}"
|
|
else
|
|
usage_error "Action ${action}: unknown check '${1}'."
|
|
fi
|
|
;;
|
|
enable|disable)
|
|
if is_wrapper "${1}" || is_check "${1}" || [ "${1}" = "all" ]; then
|
|
check_name="${1}"
|
|
else
|
|
# We use the word "check" for the end user,
|
|
# but this is actually "unknown wrapper"
|
|
usage_error "Action ${action}: unknown check '${1}'."
|
|
fi
|
|
;;
|
|
*)
|
|
usage_error "Missing or invalid ACTION argument."
|
|
;;
|
|
esac
|
|
shift;;
|
|
esac
|
|
done
|
|
|
|
|
|
if [ "$#" -gt 0 ]; then
|
|
usage_error "Too many arguments."
|
|
fi
|
|
|
|
case "${action}" in
|
|
disable|enable|show)
|
|
if [ -z "${check_name}" ]; then
|
|
usage_error "Action ${action}: missing CHECK_NAME argument."
|
|
fi
|
|
;;
|
|
esac
|
|
|
|
if [ ! "${action}" = "disable" ]; then
|
|
if [ "${default_duration}" = "False" ]; then
|
|
usage_error "Action ${action}: there is no --during option."
|
|
fi
|
|
fi
|
|
|
|
case "${action}" in
|
|
status)
|
|
alerts_status "${check_name}"
|
|
;;
|
|
check)
|
|
check "${check_name}"
|
|
;;
|
|
show)
|
|
show_check_commands "${check_name}"
|
|
;;
|
|
enable)
|
|
enable_alerts "${check_name}" "${message}"
|
|
;;
|
|
disable)
|
|
disable_alerts "${check_name}" "${message}"
|
|
;;
|
|
esac
|
|
|