commit tmp (alerts_wrapper)

This commit is contained in:
William Hirigoyen 2024-02-06 14:20:41 +01:00
parent 19204a03b6
commit 6a051b4672
3 changed files with 86 additions and 75 deletions

View file

@ -12,7 +12,10 @@ readonly PROGDIR=$(readlink -m $(dirname $0))
readonly ARGS="$@"
usage() {
echo "$PROGNAME action prefix"
echo "$PROGNAME disables or enables NRPE alerts wrapped by the script 'alerts_wrapper' in NRPE configuration."
echo "Usage: $PROGNAME enable|disable <NAME>"
echo " $PROGNAME help"
echo "NAME: one of the names given to '--names' option of 'alerts_wrapper'."
}
disable_alerts () {
@ -44,11 +47,11 @@ now () {
}
log_disable () {
echo "$(now) - alerts disabled by $(logname || echo unknown)" >> $1
echo "$(now) - alerts_switch: alerts disabled by $(logname || echo unknown)" >> $1
}
log_enable () {
echo "$(now) - alerts enabled by $(logname || echo unknown)" >> $1
echo "$(now) - alerts_switch: alerts enabled by $(logname || echo unknown)" >> $1
}
main () {
@ -59,7 +62,7 @@ main () {
mkdir -p "${base_dir}"
local file_path="${base_dir}/${prefix}_alerts"
local log_file="/var/log/${prefix}_alerts.log"
local log_file="/var/log/monitoringctl.log"
case "$action" in
enable)
@ -75,6 +78,7 @@ main () {
;;
*)
>&2 echo "Unknown action '$action'"
usage
exit 1
;;
esac

View file

@ -25,17 +25,17 @@ END
}
show_help() {
cat <<END
alerts_wrapper is supposed to wrap an NRPE command and overrides the return code.
alerts_wrapper wraps an NRPE command and overrides the return code.
Usage: alerts_wrapper --limit=1d --name=check_name command with optional arguments
or alerts_wrapper --name=check_name command with optional arguments
or alerts_wrapper check_name command with optional arguments
Usage: alerts_wrapper [--limit 1d] --names check_name[,other_disable_names,...] <check command with optional arguments>
Usage: alerts_wrapper disable_name <check command with optional arguments>
Options
--limit max age of the "check file" ;
can be "1d" for 1 day, "5m" for 5 minutes…
or more complex expressions like "1w2d10m42s"
--name check name
--names disable name (shoud contain at least the check name)
--name (deprecated) disable name (kept for backward compatibility)
-h, --help print this message and exit
-V, --version print version and exit
END
@ -52,18 +52,20 @@ time_in_seconds() {
}
delay_from_alerts_disabled_file() {
last_change=$(stat -c %Z "${alerts_disabled_file}")
limit_seconds=$(time_in_seconds "${wrapper_limit}" || time_in_seconds "${wrapper_limit_default}")
# $1: disabled file
last_change=$(stat -c %Z "$1")
limit_seconds=$(time_in_seconds "${wrapper_limit}" || time_in_seconds "${wrapper_limit_default}")
limit_date=$(date --date "${limit_seconds} seconds ago" +"%s")
echo $(( last_change - limit_date ))
}
enable_check() {
enable_checks() {
# $1: disable name
if [ "$(id -u)" -eq "0" ] ; then
/usr/local/bin/alerts_switch enable "${check_name}"
/usr/local/bin/alerts_switch enable "$1"
else
sudo /usr/local/bin/alerts_switch enable "${check_name}"
sudo /usr/local/bin/alerts_switch enable "$1"
fi
}
@ -74,19 +76,24 @@ main() {
delay=0
if [ -e "${alerts_disabled_file}" ]; then
delay=$(delay_from_alerts_disabled_file)
for disable_name in ${disable_names}; do
alerts_disabled_file="/var/lib/misc/${disable_name}_alerts_disabled"
if [ -e "${alerts_disabled_file}" ]; then
delay=$(delay_from_alerts_disabled_file "${alerts_disabled_file}")
if [ "${delay}" -le "0" ]; then
enable_check
if [ "${delay}" -le "0" ]; then
enable_checks "${disable_name}"
fi
fi
fi
if [ -e "${alerts_disabled_file}" ]; then
formatted_last_change=$(date --date "@$(stat -c %Z "${alerts_disabled_file}")" +'%c')
readonly formatted_last_change
if [ -e "${alerts_disabled_file}" ]; then
formatted_last_change=$(date --date "@$(stat -c %Z "${alerts_disabled_file}")" +'%c')
readonly formatted_last_change
echo "ALERTS DISABLED for ${disable_names} (since ${formatted_last_change}, delay: ${delay} sec) - $(cat "${check_stdout}")"
fi
done
echo "ALERTS DISABLED for ${check_name} (since ${formatted_last_change}, delay: ${delay} sec) - $(cat "${check_stdout}")"
if [ ${check_rc} = 0 ]; then
# Nagios OK
exit 0
@ -138,21 +145,21 @@ if [[ "${1}" =~ -.* ]]; then
exit 1
;;
--name)
--name|--names)
# with value separated by space
if [ -n "$2" ]; then
check_name=$2
disable_names=$2
shift
else
printf 'ERROR: "--name" requires a non-empty option argument.\n' >&2
exit 1
fi
;;
--name=?*)
--name=?*|--names=?*)
# with value speparated by =
check_name=${1#*=}
disable_names=${1#*=}
;;
--name=)
--name=|--names=)
# without value
printf 'ERROR: "--name" requires a non-empty option argument.\n' >&2
exit 1
@ -180,8 +187,8 @@ if [[ "${1}" =~ -.* ]]; then
check_command="$*"
else
# no option is passed (backward compatibility with previous version)
# treat the first argument as check_name and the rest as the command
check_name="${1}"
# treat the first argument as disable_names and the rest as the command
disable_names="${1}"
shift
check_command="$*"
fi
@ -190,8 +197,8 @@ fi
if [ -z "${wrapper_limit}" ]; then
wrapper_limit="${wrapper_limit_default}"
fi
if [ -z "${check_name}" ]; then
printf 'ERROR: You must specify a check name, with --name.\n' >&2
if [ -z "${disable_names}" ]; then
printf 'ERROR: You must specify a check name, with --names.\n' >&2
exit 1
fi
if [ -z "${check_command}" ]; then
@ -199,16 +206,12 @@ if [ -z "${check_command}" ]; then
exit 1
fi
readonly check_name
disable_names="all $(echo "${disable_names}" | tr ',' ' ')"
readonly disable_names
readonly check_command
readonly wrapper_limit
alerts_disabled_file="/var/lib/misc/${check_name}_alerts_disabled"
readonly alerts_disabled_file
check_file="/var/lib/misc/${check_name}_alerts_disabled"
readonly check_file
check_stdout=$(mktemp --tmpdir=/tmp "${check_name}_stdout.XXXX")
check_stdout=$(mktemp --tmpdir=/tmp "${disable_names}_stdout.XXXX")
readonly check_stdout
# shellcheck disable=SC2064

View file

@ -243,7 +243,10 @@ function disable-alerts {
exit 1
fi
# TODO: Check not disabled yet
# Are alerts already disabled ?
if [ -f /var/lib/misc/all_alerts_disabled ]; then
fi
default_msg="."
if [ "${default_duration}" = "True" ]; then
@ -256,46 +259,50 @@ Our monitoring system will continue to gather checks outputs, so alerts history
To re-enable alerts before ${duration}, execute (as root or with sudo):
monitoringctl enable-alerts
EOF
echo -n "Confirm (y/N)? "
echo -n "Confirm (y/N)? "
read -r answer
if [ "$answer" = "Y" ] || [ "$answer" = "y" ]; then
log "Action disable-alerts requested for ${duration}: '${1}'"
for check in $(get_checks_list); do
# Log a warning if check has no wrapper
command=$(get_check_commands "${check}" | tail -n1)
echo "${command}" | grep --quiet --no-messages alerts_wrapper
rc=$?
if [ "${rc}" -ne 0 ]; then
log "Warning: check '${check}' has no alerts_wrapper, it will not be disabled."
fi
wrapper_names=$(get_check_commands "${check}" | tail -n1 | awk '{match($0, /.*--name\s+([^[:space:]]+)/, arr); print arr[1]}')
for name in $(echo "${wrapper_names=}" | tr ',' '\n'); do
echo "$(now) - Executing 'alerts_switch disable ${name}'" >> "${log_path}"
alerts_switch disable "${name}"
done
done
#TODO remove previous units if any
#TODO systemd-run --quiet --unit="" --on-calendar="" -- monitoringctl enable-alerts "[AUTO] ${}"
echo "Alerts are now disabled for ${duration}."
else
if [ "$answer" != "Y" ] && [ "$answer" != "y" ]; then
echo "Canceled."
exit 0
fi
exit 0
log "Action disable-alerts requested for ${duration} by user $(logname || echo unknown): '$1'"
# Log a warning if a check has no wrapper
for check in $(get_checks_list); do
command=$(get_check_commands "${check}" | tail -n1)
if ! echo "${command}" | grep --quiet --no-messages alerts_wrapper; then
log "Warning: check '${check}' has no alerts_wrapper, it will not be disabled."
fi
done
#wrapper_names=$(get_check_commands "${check}" | tail -n1 | awk '{match($0, /.*--name\s+([^[:space:]]+)/, arr); print arr[1]}')
#for name in $(echo "${wrapper_names=}" | tr ',' '\n'); do
# log "Executing 'alerts_switch disable ${name}'"
# alerts_switch disable "${name}"
#done
#done
log "Executing 'alerts_switch disable all'"
alerts_switch disable all
#TODO remove previous units if any
#TODO systemd-run --quiet --unit="" --on-calendar="" -- monitoringctl enable-alerts "[AUTO] $1"
echo "Alerts are now disabled for ${duration}."
}
function enable-alerts {
# $1: comment
#TODO
log "Action enable-alerts requested by user $(logname || echo unknown): '${1}'"
log "Executing 'alerts_switch enable all'"
echo "Alerts are re-enabled (stub)."
#echo "Alerts were already enabled."
alerts_switch enable all
exit 0
echo "Alerts are now re-enabled (stub)."
#TODO ou: echo "Alerts were already enabled."
}
@ -382,9 +389,8 @@ if [ "${action}" = "check" ]; then
check_name="$1"
check "$check_name"
fi
if [ "${action}" = "enable-alerts" ]; then
elif [ "${action}" = "enable-alerts" ]; then
if [ "$#" = 0 ]; then
usage_error "Action enable-alerts: missing COMMENT argument."
fi
@ -397,9 +403,8 @@ if [ "${action}" = "enable-alerts" ]; then
comment="$1"
enable-alerts "${comment}"
fi
if [ "${action}" = "disable-alerts" ]; then
elif [ "${action}" = "disable-alerts" ]; then
if [ "$#" = 0 ]; then
usage_error "Action disable-alerts: missing COMMENT argument."
fi
@ -411,9 +416,8 @@ if [ "${action}" = "disable-alerts" ]; then
comment="$1"
disable-alerts "${comment}"
fi
if [ "${action}" = "alerts-status" ]; then
elif [ "${action}" = "alerts-status" ]; then
if [ "$#" -gt 0 ]; then
usage_error "Action alerts-status: too many arguments."
fi