diff --git a/CHANGELOG.md b/CHANGELOG.md index ab417bea..21ee1ef8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ The **patch** part is incremented if multiple releases happen the same month * lxc: new lxc_template_mirror option (useful to get old Debian from archive.debian.org) * nagios-nrpe: add new check_ftp_users * proftpd: add new munin graph (users count) +* nagios-nrpe: new monitoringctl command ### Changed diff --git a/fluentd/tasks/main.yml b/fluentd/tasks/main.yml index de76320c..df645dcb 100644 --- a/fluentd/tasks/main.yml +++ b/fluentd/tasks/main.yml @@ -75,7 +75,7 @@ - name: NRPE check is configured ansible.builtin.lineinfile: path: /etc/nagios/nrpe.d/evolix.cfg - line: 'command[check_fluentd]=/usr/lib/nagios/plugins/check_tcp -p {{ fluentd_port }}' + line: 'command[check_fluentd]=/usr/local/lib/monitoringctl/alerts_wrapper --name fluentd /usr/lib/nagios/plugins/check_tcp -p {{ fluentd_port }}' notify: "restart nagios-nrpe-server" tags: - fluentd diff --git a/keepalived/tasks/main.yml b/keepalived/tasks/main.yml index 3f436d0e..33602ffd 100644 --- a/keepalived/tasks/main.yml +++ b/keepalived/tasks/main.yml @@ -36,7 +36,7 @@ ansible.builtin.lineinfile: dest: /etc/nagios/nrpe.d/evolix.cfg regexp: 'command\[check_keepalived\]' - replace: 'command[check_keepalived]=/usr/local/lib/nagios/plugins/check_keepalived' + replace: 'command[check_keepalived]=/usr/local/lib/monitoringctl/alerts_wrapper --name keepalived /usr/local/lib/nagios/plugins/check_keepalived' notify: restart nagios-nrpe-server tags: - keepalived diff --git a/memcached/tasks/nrpe.yml b/memcached/tasks/nrpe.yml index 70c69c00..800c31fb 100644 --- a/memcached/tasks/nrpe.yml +++ b/memcached/tasks/nrpe.yml @@ -34,7 +34,7 @@ ansible.builtin.lineinfile: name: /etc/nagios/nrpe.d/evolix.cfg regexp: '^command\[check_memcached\]=' - line: 'command[check_memcached]=/usr/local/lib/nagios/plugins/check_memcached.pl -H 127.0.0.1 -p {{ memcached_port }}' + line: 'command[check_memcached]=/usr/local/lib/monitoringctl/alerts_wrapper --name memcached /usr/local/lib/nagios/plugins/check_memcached.pl -H 127.0.0.1 -p {{ memcached_port }}' notify: restart nagios-nrpe-server when: memcached_instance_name | length == 0 @@ -42,7 +42,7 @@ ansible.builtin.lineinfile: name: /etc/nagios/nrpe.d/evolix.cfg regexp: '^command\[check_memcached\]=' - line: 'command[check_memcached]=/usr/local/lib/nagios/plugins/check_memcached_instances' + line: 'command[check_memcached]=/usr/local/lib/monitoringctl/alerts_wrapper --name memcached /usr/local/lib/nagios/plugins/check_memcached_instances' notify: restart nagios-nrpe-server when: memcached_instance_name | length > 0 diff --git a/minifirewall/tasks/nrpe.yml b/minifirewall/tasks/nrpe.yml index 691dd454..33686cb3 100644 --- a/minifirewall/tasks/nrpe.yml +++ b/minifirewall/tasks/nrpe.yml @@ -46,7 +46,7 @@ ansible.builtin.lineinfile: dest: /etc/nagios/nrpe.d/evolix.cfg regexp: 'command\[check_minifirewall\]' - line: 'command[check_minifirewall]=sudo {{ nagios_plugins_directory }}/check_minifirewall' + line: 'command[check_minifirewall]=/usr/local/lib/monitoringctl/alerts_wrapper --name minifirewall sudo {{ nagios_plugins_directory }}/check_minifirewall' notify: restart nagios-nrpe-server when: nrpe_evolix_cfg.stat.exists diff --git a/nagios-nrpe/files/alerts_switch b/nagios-nrpe/files/alerts_switch old mode 100644 new mode 100755 index 3c5a1417..f8d02834 --- a/nagios-nrpe/files/alerts_switch +++ b/nagios-nrpe/files/alerts_switch @@ -1,83 +1,143 @@ #!/bin/bash - -# https://forge.evolix.org/projects/evolix-private/repository # -# You should not alter this file. -# If you need to, create and customize a copy. - -set -e +# Source: +# https://gitea.evolix.org/evolix/ansible-roles/src/branch/stable/nagios-nrpe +# readonly PROGNAME=$(basename $0) -readonly PROGDIR=$(readlink -m $(dirname $0)) -readonly ARGS="$@" +readonly VERSION="24.06.00" -usage() { - echo "$PROGNAME action prefix" +# Load common functions and vars +readonly lib_dir="/usr/local/lib/monitoringctl" +if [ -r "${lib_dir}/common" ]; then + # shellcheck source=monitoringctl_common + source "${lib_dir}/common" +else + >&2 echo "Error: missing ${lib_dir}/common file." + exit 1 +fi + +if [ ! -e "${var_dir}" ]; then + >&2 echo "Warning: missing ${var_dir} directory." +fi + +function show_help() { + cat <] [--message ''] + $PROGNAME enable [--message ''] + $PROGNAME help + +WRAPPER_NAME: The name given to '--name' option of 'alerts_wrapper'. +DURATION: Duration of alert disabling. + Can be '1d' for 1 day, '5m' for 5 minutes or more complex + expressions like '1w2d10m42s' (if no time unit is provided, + hour is assumed) + Default value: 1h +DISABLE_MESSAGE: Message that will be logged and printed by alerts_wrapper + when alert is disabled. +ENABLE_MESSAGE: Message that will be logged when alert is enabled +END } -disable_alerts () { - disabled_file="$1_disabled" - enabled_file="$1_enabled" +function disable_alerts() { + # $1: wrapper name, $2: duration_sec, $3: disable message + now_secs=$(date +"%s") + disable_until_secs=$(( now_secs + ${2} )) + disable_file_path="$(get_disable_file_path "${1}")" + echo "${disable_until_secs}" > "${disable_file_path}" + echo "$(logname || echo unknown): \"${3}\"" >> "${disable_file_path}" + chmod 0644 "${disable_file_path}" + log "${1} alerts disabled by $(logname || echo unknown)" + log "Disable message: ${3}" +} - if [ -e "${enabled_file}" ]; then - mv "${enabled_file}" "${disabled_file}" - else - touch "${disabled_file}" - chmod 0644 "${disabled_file}" +function enable_alerts() { + # $1: wrapper name, $2: enable message + disable_file_path="$(get_disable_file_path "${1}")" + if [ -e "${disable_file_path}" ]; then + rm "${disable_file_path}" + fi + log "${1} alerts enabled by $(logname || echo unknown)" + log "Enable message: ${2}" +} + +function main() { + if [ "${action}" == 'enable' ]; then + if [ "${wrapper_name}" == "all" ]; then + for wrapper in $(get_wrappers_names); do + enable_alerts "${wrapper}" "${message}" + done + else + enable_alerts "${wrapper_name}" "${message}" + fi + elif [ "${action}" == 'disable' ]; then + duration_sec=$(time_to_seconds "${duration}") + if [ "${wrapper_name}" == "all" ]; then + for wrapper in $(get_wrappers_names); do + disable_alerts "${wrapper}" "${duration_sec}" "${message}" + done + else + disable_alerts "${wrapper_name}" "${duration_sec}" "${message}" + fi + elif [ "${action}" == 'help' ]; then + show_help fi } -enable_alerts () { - disabled_file="$1_disabled" - enabled_file="$1_enabled" - if [ -e "${disabled_file}" ]; then - mv "${disabled_file}" "${enabled_file}" - else - touch "${enabled_file}" - chmod 0644 "${enabled_file}" - fi -} +while :; do + case "${1}" in + enable|disable|help) + action="${1}" + shift;; + -d|--during) + if [ "$#" -gt 1 ]; then + if filter_duration "${2}"; then + duration="${2}" + else + usage_error "Option --during: \"${2}\" is not a valid duration." + fi + else + error "Missing --during argument." + fi + shift; shift;; + -m|--message) + if [ "$#" -gt 1 ]; then + message="${2}" + else + error "Missing --message argument." + fi + shift; shift;; + *) + if [ -n "${1}" ]; then + if is_wrapper "${1}" || [ "${1}" == "all" ]; then + wrapper_name="${1}" + else + error "Unknown argument '${1}', or NAME not defined in NRPE configuration." + fi + else + if [ -z "${action}" ]; then + error "Missing action argument." + elif [ -z "${1}" ]; then + break + fi + fi -now () { - date --iso-8601=seconds -} - -log_disable () { - echo "$(now) - alerts disabled by $(logname || echo unknown)" >> $1 -} - -log_enable () { - echo "$(now) - alerts enabled by $(logname || echo unknown)" >> $1 -} - -main () { - local action=$1 - local prefix=$2 - - local base_dir="/var/lib/misc" - mkdir -p "${base_dir}" - - local file_path="${base_dir}/${prefix}_alerts" - local log_file="/var/log/${prefix}_alerts.log" - - case "$action" in - enable) - enable_alerts ${file_path} - log_enable ${log_file} - ;; - disable) - disable_alerts ${file_path} - log_disable ${log_file} - ;; - help) - usage - ;; - *) - >&2 echo "Unknown action '$action'" - exit 1 - ;; + shift;; esac -} +done + +if [ -z "${wrapper_name}" ] && [ "${action}" != 'help' ] ; then + error "Missing WRAPPER_NAME." +fi + +if [ -z "${duration}" ]; then + duration="${default_disabled_time}" +fi + +readonly wrapper_name duration action + +main -main $ARGS diff --git a/nagios-nrpe/files/alerts_wrapper b/nagios-nrpe/files/alerts_wrapper old mode 100644 new mode 100755 index d4524fdd..0d7a00e8 --- a/nagios-nrpe/files/alerts_wrapper +++ b/nagios-nrpe/files/alerts_wrapper @@ -1,114 +1,101 @@ #!/bin/bash - -# https://forge.evolix.org/projects/evolix-private/repository # -# You should not alter this file. -# If you need to, create and customize a copy. +# Source: +# https://gitea.evolix.org/evolix/ansible-roles/src/branch/stable/nagios-nrpe +# -VERSION="21.04" -readonly VERSION +readonly PROGNAME=$(basename $0) +readonly VERSION="24.06.00" -# base functions +# Load common functions and vars +readonly lib_dir="/usr/local/lib/monitoringctl" +if [ -r "${lib_dir}/common" ]; then + # shellcheck source=monitoringctl_common + source "${lib_dir}/common" +else + >&2 echo "Error: missing ${lib_dir}/common file." + exit 1 +fi -show_version() { +if [ ! -e "${var_dir}" ]; then + >&2 echo "Warning: missing ${var_dir} directory." +fi + + +function show_help() { cat <, - Jérémy Lecour - and others. - -alerts_wrapper comes with ABSOLUTELY NO WARRANTY.This is free software, -and you are welcome to redistribute it under certain conditions. -See the GNU General Public License v3.0 for details. -END -} -show_help() { - cat < +Usage: alerts_wrapper (deprecated) Options - --limit max age of the "check file" ; - can be "1d" for 1 day, "5m" for 5 minutes… - or more complex expressions like "1w2d10m42s" - --name check name - -h, --help print this message and exit - -V, --version print version and exit + --name Wrapper name, it is very recommended to use the check name (like load, disk1…). + Special name: 'all' is already hard-coded. + -h, --help Print this message and exit. + -V, --version Print version and exit. END } -time_in_seconds() { - if echo "${1}" | grep -E -q '^([0-9]+[wdhms])+$'; then - echo "${1}" | sed 's/w/ * 604800 + /g; s/d/ * 86400 + /g; s/h/ * 3600 + /g; s/m/ * 60 + /g; s/s/ + /g; s/+ $//' | xargs expr - elif echo "${1}" | grep -E -q '^([0-9]+$)'; then - echo "${1} * 3600" | xargs expr - else - return 1 - fi -} - -delay_from_alerts_disabled_file() { - last_change=$(stat -c %Z "${alerts_disabled_file}") - limit_seconds=$(time_in_seconds "${wrapper_limit}" || time_in_seconds "${wrapper_limit_default}") - limit_date=$(date --date "${limit_seconds} seconds ago" +"%s") - - echo $(( last_change - limit_date )) -} - -enable_check() { +function enable_wrapper() { + # $1: wrapper name if [ "$(id -u)" -eq "0" ] ; then - /usr/local/bin/alerts_switch enable "${check_name}" + /usr/local/bin/alerts_switch enable "${1}" else - sudo /usr/local/bin/alerts_switch enable "${check_name}" + sudo /usr/local/bin/alerts_switch enable "${1}" fi } -main() { - ${check_command} > "${check_stdout}" - check_rc=$? - readonly check_rc +function main() { + is_disabled="$(is_disabled_wrapper "${wrapper_name}")" - delay=0 - - if [ -e "${alerts_disabled_file}" ]; then - delay=$(delay_from_alerts_disabled_file) - - if [ "${delay}" -le "0" ]; then - enable_check - fi + if [ -e "${disable_file}" ] && [ "${is_disabled}" = "False" ]; then + enable_wrapper "${wrapper_name}" fi - if [ -e "${alerts_disabled_file}" ]; then - formatted_last_change=$(date --date "@$(stat -c %Z "${alerts_disabled_file}")" +'%c') - readonly formatted_last_change + timeout_command="" + if [ "${is_disabled}" = "True" ]; then + timeout_command="timeout 8" + fi - echo "ALERTS DISABLED for ${check_name} (since ${formatted_last_change}, delay: ${delay} sec) - $(cat "${check_stdout}")" + check_stdout="$(${timeout_command} ${check_command})" + check_rc=$? + + if [ "${is_disabled}" = "True" ] && [ "${check_rc}" -eq 124 ] && [ -z "${check_stdout}" ]; then + check_stdout="Check timeout (> 8 sec)" + fi + + if [ "${is_disabled}" = "True" ]; then + enable_time="$(get_enable_time "${wrapper_name}")" + enable_delay="$(enable_delay "${enable_time}")" + delay_str="$(delay_to_string "${enable_delay}")" + enable_date="$(date --date "+${enable_delay} seconds" "+%d %h %Y at %H:%M:%S")" + disable_msg="$(get_disable_message "${wrapper_name}")" + if [ -n "${disable_msg}" ]; then + disable_msg="- ${disable_msg} " + fi + echo "ALERT DISABLED until ${enable_date} (${delay_str} left) ${disable_msg}- Check output: ${check_stdout}" + else + echo "${check_stdout}" + fi + + if [ "${is_disabled}" = "True" ]; then if [ ${check_rc} = 0 ]; then - # Nagios OK - exit 0 + exit 0 # Nagios OK else - # Nagios WARNING - exit 1 + exit 1 # Nagios WARNING fi else - cat "${check_stdout}" exit ${check_rc} fi } -# Default: 1 day before re-enabling the check -wrapper_limit_default="1d" -readonly wrapper_limit_default if [[ "${1}" =~ -.* ]]; then # parse options # based on https://gist.github.com/deshion/10d3cb5f88a21671e17a while :; do - case $1 in + case "${1}" in -h|-\?|--help) show_help exit 0 @@ -117,47 +104,25 @@ if [[ "${1}" =~ -.* ]]; then show_version exit 0 ;; - - --limit) + -n|--name) # with value separated by space - if [ -n "$2" ]; then - wrapper_limit=$2 - shift - else - printf 'ERROR: "--limit" requires a non-empty option argument.\n' >&2 - exit 1 - fi - ;; - --limit=?*) - # with value speparated by = - wrapper_limit=${1#*=} - ;; - --limit=) - # without value - printf 'ERROR: "--limit" requires a non-empty option argument.\n' >&2 - exit 1 - ;; - - --name) - # with value separated by space - if [ -n "$2" ]; then - check_name=$2 + if [ -n "${2}" ]; then + wrapper_name="${2}" shift else printf 'ERROR: "--name" requires a non-empty option argument.\n' >&2 - exit 1 + exit 2 fi ;; - --name=?*) - # with value speparated by = - check_name=${1#*=} + -n|--name=?*) + # with value separated by = + wrapper_name="${1#*=}" ;; - --name=) + -n|--name=) # without value printf 'ERROR: "--name" requires a non-empty option argument.\n' >&2 - exit 1 + exit 2 ;; - --) # End of all options. shift @@ -165,8 +130,8 @@ if [[ "${1}" =~ -.* ]]; then ;; -?*) # ignore unknown options - printf 'WARN: Unknown option : %s\n' "$1" >&2 - exit 1 + printf 'ERROR: Unknown option : %s\n' "${1}" >&2 + exit 2 ;; *) # Default case: If no more options then break out of the loop. @@ -180,38 +145,22 @@ if [[ "${1}" =~ -.* ]]; then check_command="$*" else # no option is passed (backward compatibility with previous version) - # treat the first argument as check_name and the rest as the command - check_name="${1}" + # treat the first argument as wrapper_name and the rest as the command + wrapper_name="${1}" shift check_command="$*" fi -# Default values or errors -if [ -z "${wrapper_limit}" ]; then - wrapper_limit="${wrapper_limit_default}" -fi -if [ -z "${check_name}" ]; then - printf 'ERROR: You must specify a check name, with --name.\n' >&2 - exit 1 +if [ -z "${wrapper_name}" ]; then + printf 'ERROR: You must specify a wrapper name, with --names.\n' >&2 + exit 2 fi if [ -z "${check_command}" ]; then printf 'ERROR: You must specify a command to execute.\n' >&2 - exit 1 + exit 2 fi -readonly check_name -readonly check_command -readonly wrapper_limit -alerts_disabled_file="/var/lib/misc/${check_name}_alerts_disabled" -readonly alerts_disabled_file - -check_file="/var/lib/misc/${check_name}_alerts_disabled" -readonly check_file - -check_stdout=$(mktemp --tmpdir=/tmp "${check_name}_stdout.XXXX") -readonly check_stdout - -# shellcheck disable=SC2064 -trap "rm ${check_stdout}" EXIT +disable_file="$(get_disable_file_path "${wrapper_name}")" +readonly wrapper_name check_command disable_file main diff --git a/nagios-nrpe/files/check-local b/nagios-nrpe/files/check-local old mode 100755 new mode 100644 index 5d38d7a5..82a0ccaf --- a/nagios-nrpe/files/check-local +++ b/nagios-nrpe/files/check-local @@ -1,36 +1,9 @@ #!/usr/bin/env bash -CHECK_BIN=/usr/lib/nagios/plugins/check_nrpe +readonly orange="\e[0;33m" +readonly nocolor="\e[0m" -server_address="127.0.0.1" - -if ! test -f "${CHECK_BIN}"; then - echo "${CHECK_BIN} is missing, please install nagios-nrpe-plugin package." - exit 1 -fi - -for file in /etc/nagios/{nrpe.cfg,nrpe_local.cfg,nrpe.d/evolix.cfg}; do - if [ -r ${file} ]; then - command_search=$(grep "\[check_$1\]" "${file}" | grep -v '^[[:blank:]]*#' | tail -n1 | cut -d'=' -f2-) - fi - if [ -n "${command_search}" ]; then - command="${command_search}" - fi - - if [ -r ${file} ]; then - server_address_search=$(grep "server_address" "${file}" | grep -v '^[[:blank:]]*#' | cut -d'=' -f2) - fi - if [ -n "${server_address_search}" ]; then - server_address="${server_address_search}" - fi -done - -if [ -n "${command}" ]; then - echo "Found command in /etc/nagios (take care, in some cases, Nagios can play another command):" - echo " ${command}" -fi - -echo "NRPE daemon output:" -"${CHECK_BIN}" -H "${server_address}" -c "check_$1" +echo -e "${orange}'check-local' is now an alias for 'monitoringctl check'. See 'monitoringctl -h' for more information.${nocolor}" +monitoringctl check "${1}" diff --git a/nagios-nrpe/files/check-local_completion b/nagios-nrpe/files/check-local_completion old mode 100644 new mode 100755 index ea3ed64b..f3e3fee8 --- a/nagios-nrpe/files/check-local_completion +++ b/nagios-nrpe/files/check-local_completion @@ -1,10 +1,14 @@ #!/usr/bin/env bash +function _get_checks_names() { + grep --extended-regexp --no-filename --no-messages -R "command\[check_.*\]=" /etc/nagios/ | grep --invert-match --extended-regexp "^\s*#" | awk -F"[\\\[\\\]=]" '{sub("check_", "", $2); print $2}' | sort | uniq +} + +# List of available checks _check_local_dynamic_completion() { - local cur; - cur=${COMP_WORDS[COMP_CWORD]}; - COMPREPLY=(); - COMPREPLY=( $( compgen -W '$(grep "\[check_" -Rs /etc/nagios/ | grep -vE "^[[:blank:]]*#" | awk -F"[\\\[\\\]=]" "{print \$2}" | sed "s/check_//" | sort | uniq)' -- $cur ) ); + local cur=${COMP_WORDS[COMP_CWORD]}; + + COMPREPLY=( $( compgen -W '$(_get_checks_names)' -- "${cur}" ) ); } complete -F _check_local_dynamic_completion check-local diff --git a/nagios-nrpe/files/check_async b/nagios-nrpe/files/check_async old mode 100644 new mode 100755 diff --git a/nagios-nrpe/files/monitoringctl b/nagios-nrpe/files/monitoringctl new file mode 100755 index 00000000..15969aba --- /dev/null +++ b/nagios-nrpe/files/monitoringctl @@ -0,0 +1,634 @@ +#!/usr/bin/env bash + +#set -x + +readonly VERSION="24.06.00" + +function show_help() { + cat <&2 echo "${check_nrpe_bin} is missing, please install nagios-nrpe-plugin package." + exit 1 + fi + + conf_lines="$(get_nrpe_conf "${nrpe_conf_path}")" + + server_address=$(echo "$conf_lines" | grep "server_address" | tail -n1 | cut -d'=' -f2) + if [ -z "${server_address}" ]; then server_address="127.0.0.1"; fi + + server_port=$(echo "$conf_lines" | grep "server_port" | tail -n1 | cut -d'=' -f2) + if [ -z "${server_port}" ]; then server_port="5666"; fi + + if [ -z "${1}" ] || [ "${1}" = "all" ]; then + # Array header for multi-checks + checks="$(get_checks_names)" + header="Check\tStatus\tOutput (truncated)" + underline="-----\t------\t------------------" + str_out="\n${header}\n${underline}\n" + else + checks="${1}" + fi + + for check in $checks; do + printf "\033[KChecking %s…\r" "${check}" + err_msg="" + if [ "${bypass_nrpe}" = "False" ]; then + request_command="${check_nrpe_bin} -H ${server_address} -p ${server_port} -c check_${check} 2&>1" + else + check_commands="$(get_check_commands "${check}")" + if [ -n "${check_commands}" ]; then + check_command="$(echo "${check_commands}" | tail -n1)" + request_command="sudo -u nagios -- ${check_command}" + else + if [ -z "${1}" ] || [ "${1}" = "all" ]; then + err_msg="Check command not found in NRPE configuration." + else + err_msg="Error: no command found in NRPE configuration for check '${check}'. Aborted." + fi + fi + fi + if [ -z "${err_msg}" ]; then + check_output="$(${request_command})" + rc="$?" + check_output="$(echo "${check_output}" | tr '\n' ' ')" + if [ -z "${1}" ] || [ "${1}" = "all" ]; then + if [ "${#check_output}" -gt 60 ]; then + check_output="$(echo "${check_output}" | cut -c-80) [...]" + fi + fi + else + check_output="${err_msg}" + rc="3" + fi + + case "${rc}" in + 0) + rc_str="OK" + color="${green}" + ;; + 1) + rc_str="Warning" + color="${orange}" + ;; + 2) + rc_str="Critical" + color="${red}" + ;; + 3) + rc_str="Unknown" + color="${purple}" + ;; + *) + rc_str="Unknown" + color="${purple}" + esac + + if [ -z "${1}" ] || [ "${1}" = "all" ]; then + str_out="${str_out}${color}${check}\t${rc_str}${nocolor}\t${check_output}\n" + fi + done + + if [ -z "${1}" ] || [ "${1}" = "all" ]; then + echo -e "${str_out}" | column -t -s $'\t' + else + printf "\033[K\n" # erase tmp line « Checking check_toto…» + if [ "${bypass_nrpe}" = "False" ]; then + echo -e "NRPE service output (on ${server_address}:${server_port}):\n" + else + echo -e "Direct check output (bypassing NRPE):\n" + fi + echo -e "${color}${check_output}${nocolor}\n" | sed 's/|/\n/g' + exit "${rc}" + fi +} + +# Print error message and exit if not installed +function alerts_switch_is_installed() { + if ! command -v alerts_switch &> /dev/null; then + error "Error: script 'alerts_switch' is not installed. Aborted." + fi +} + +function disable_alerts() { + # $1: check name | all + # $2: disable message + alerts_switch_is_installed + + if [ "${1}" = "all" ]; then + checks="$(get_checks_names)" + else + checks="${1}" + fi + + warn_not_wrapped "${checks}" + warn_wrapper_names "${checks}" + + if [ -z "${2}" ]; then + if [ "${is_interactive}" = "False" ]; then + error "Error: disable message option is mandatory in non-interactive shell." + fi + echo -n "> Please provide a disable message (for logging and check output): " + read -r message + echo '' + if [ -z "${message}" ]; then + error "${red}Error:${nocolor} disable message is mandatory." + fi + else + message="${2}" + fi + + default_msg="" + if [ "${default_duration}" = "True" ]; then + default_msg=" (use --during to change default time)" + fi + + if [ "${1}" = "all" ]; then + check_txt="All checks" + else + check_txt="Check ${1}" + fi + + echo_box "${check_txt} will be disabled for ${duration}${default_msg}." + cat <&2 echo -e "${orange}Warning:${nocolor} because they have the same configuration, disabling ${1} will disable: ${checks}.\n" + log "Warning: disabling ${1} will disable ${checks} (which have the same wrapper name)." + fi + else + wrapper="all" + fi + + if [ "${is_interactive}" = "True" ]; then + echo -n "> Confirm (y/N)? " + read -r answer + if [ "${answer}" != "Y" ] && [ "${answer}" != "y" ]; then + echo -e "${orange}Canceled.${nocolor}" && exit 0 + fi + fi + + log "Action disable ${1} requested for ${duration} by user $(logname || echo unknown)." + + alerts_switch disable "${wrapper}" --during "${duration}" --message "${message}" + + if [ "${1}" != "all" ]; then + if [ "${n_checks}" -eq 1 ]; then + echo -e "${orange}Check ${1} alerts are now disabled for ${duration}.${nocolor}" + else + echo -e "${orange}Alerts are now disabled for ${duration} for checks: ${checks}.${nocolor}" + fi + else + echo -e "${orange}All alerts are now disabled for ${duration}.${nocolor}" + fi +} + +function enable_alerts() { + # $1: check name, $2: enable message + alerts_switch_is_installed + + if [ "${1}" != "all" ]; then + # Verify that check is not already enabled + is_disabled="$(is_disabled_check "${1}")" + if [ "${is_disabled}" = "False" ]; then + echo "${1} is already enabled, see 'monitoringctl status'" + exit 0 + fi + fi + + if [ -z "${2}" ]; then + if [ "${is_interactive}" = "False" ]; then + error "Error: disable message option is mandatory in non-interactive shell." + fi + echo -n "> Please provide an enable message (for logging): " + read -r message + echo '' + if [ -z "${message}" ]; then + error "${red}Error:${nocolor} disable message is mandatory." + fi + else + message="${2}" + fi + + log "Action enable ${1} requested by user $(logname || echo unknown)." + + if [ "${1}" != "all" ]; then + if is_check "${1}"; then + wrapper="$(get_check_wrapper_name "${1}")" + else + wrapper="${1}" + fi + checks="$(get_wrapper_checks "${wrapper}")" + n_checks="$(echo "${checks}" | wc -w)" + if [ "${n_checks}" -gt 1 ]; then + >&2 echo -e "${orange}Warning:${nocolor} because they have the same configuration, enabling ${1} will enable: ${checks}.\n" + log "Warning: check ${1} will enable ${checks} (which have the same wrapper name)." + fi + else + wrapper="all" + fi + + alerts_switch enable "${wrapper}" --message "${message}" + + if [ "${1}" != "all" ]; then + if [ "${n_checks}" -eq 1 ]; then + echo -e "${green}Check ${1} alerts are now enabled.${nocolor}" + else + echo -e "${green}Alerts are now enabled for checks: ${checks}.${nocolor}" + fi + else + echo -e "${green}All alerts are now enabled.${nocolor}" + fi +} + +# Show NRPE command(s) configured for a check +function show_check_commands() { + # $1: check name + check_commands=$(get_check_commands "${1}") + + if [ -z "${check_commands}" ]; then + usage_error "Error: no command found in NRPE configuration for check '${1}." + fi + + n_commands="$(echo "${check_commands}" | wc -l)" + if [ "${n_commands}" -ne 1 ]; then + echo "Available commands (in config order, the last one overwrites the others):" + echo " $check_commands" + fi + + check_command=$(echo "${check_commands}" | tail -n1) + echo "Command used by NRPE:" + echo " ${check_command}" +} + +# Print a warning if some wrappers have the same name +# or if a name is different from the check. +function warn_wrapper_names() { + #$1: checks to verify + warned="False" + for check in ${1}; do + wrapper_name="$(get_check_wrapper_name "${check}")" + if [ -n "${wrapper_name}" ] && [ "${wrapper_name}" != "${check}" ]; then + >&2 echo -e "${orange}Warning:${nocolor} ${check} check has wrapper name ${wrapper_name}." + warned="True" + fi + done + if [ "${warned}" = "True" ]; then + >&2 echo -e "${orange}It is recommanded to name the wrappers the same as the checks.${nocolor}\n" + fi +} + +# Print a warning if some checks are not wrapped +function warn_not_wrapped() { + #$1: checks to verify + unwrappeds="$(not_wrapped_checks)" + unwrapped_checks="$(comm -12 <(echo "${1}") <(echo "${unwrappeds}"))" + if [ -n "${unwrapped_checks}" ]; then + n_checks="$(echo "${1}" | wc -w)" + n_unwrapped="$(echo "${unwrapped_checks}" | wc -w)" + if [ "${n_unwrapped}" == "${n_checks}" ]; then + if [ "${n_unwrapped}" -eq 1 ]; then + error "${red}Error:${nocolor} ${1} check is not wrapped, it cannot be disabled." + else + error "${red}Error:${nocolor} these checks are not wrapped, they cannot be disabled: $(echo "${unwrapped_checks}" | xargs)" + fi + else + if [ "${n_unwrapped}" -eq 1 ]; then + >&2 echo -e "${orange}Warning:${nocolor} ${unwrapped_checks} check is not wrapped, it will not be disabled." + else + >&2 echo -e -n "${orange}Warning:${nocolor} some checks are not configured, they will not be disabled: $(echo "${unwrapped_checks}" | xargs)\n\n" + fi + fi + + log "Warning: some checks have no alerts_wrapper, they will not be disabled: $(echo "${unwrapped_checks}" | xargs)" + fi +} + +# Echo a message in a box +function echo_box() { + # $1: message + msg_len="${#1}" + line="$(printf '─%.0s' $(eval "echo {1.."${msg_len}"}"))" + cat <&2 echo "Error: missing ${lib_dir}/common file." + exit 1 +fi + +if [[ ! "${PATH}" =~ /usr/local/bin ]]; then + PATH="/usr/local/bin:${PATH}" +fi + +# Must be root +if [ "$(id -u)" -ne 0 ]; then + >&2 echo "You need to be root (or use sudo) to run ${0}!" + exit 1 +fi + +# No argument +if [ "$#" = "0" ]; then + show_help + exit 1 +fi + +# Default arguments and options +action="" +message="" +duration="${default_disabled_time}" +bypass_nrpe="False" +default_duration="True" + +# Parse arguments and options +while :; do + case "${1}" in + -h|-\?|--help) + show_help + exit 0;; + -V|--version) + show_version + exit 0;; + -b|--bypass-nrpe) + bypass_nrpe="True" + shift;; + -d|--during) + if [ "${default_duration}" = "False" ]; then + usage_error "Option --during: defined multiple times." + fi + if [ "$#" -lt 2 ]; then + usage_error "Option --during: missing value." + fi + if filter_duration "${2}"; then + duration="${2}" + else + usage_error "Option --during: \"${2}\" is not a valid duration." + fi + default_duration="False" + shift; shift;; + -m|--message) + if [ "$#" -lt 2 ]; then + usage_error "Option --message: missing message string." + fi + message="${2}" + shift; shift;; + status|check|enable|disable|show|list) + action="${1}" + shift;; + *) + if [ -z "${1}" ]; then + break + fi + + case "${action}" in + status|check) + if is_check "${1}" || [ "${1}" = "all" ]; then + check_name="${1}" + else + usage_error "Action ${action}: unknown check '${1}'." + fi + ;; + show) + if is_check "${1}"; then + check_name="${1}" + else + usage_error "Action ${action}: unknown check '${1}'." + fi + ;; + enable|disable) + if is_wrapper "${1}" || is_check "${1}" || [ "${1}" = "all" ]; then + check_name="${1}" + else + # We use the word "check" for the end user, + # but this is actually "unknown wrapper" + usage_error "Action ${action}: unknown check '${1}'." + fi + ;; + *) + usage_error "Missing or invalid ACTION argument." + ;; + esac + shift;; + esac +done + + +if [ "$#" -gt 0 ]; then + usage_error "Too many arguments." +fi + +case "${action}" in + disable|enable|show) + if [ -z "${check_name}" ]; then + usage_error "Action ${action}: missing CHECK_NAME argument." + fi + ;; +esac + +if [ ! "${action}" = "disable" ]; then + if [ "${default_duration}" = "False" ]; then + usage_error "Action ${action}: there is no --during option." + fi +fi + +case "${action}" in + list) + list_checks + ;; + status) + alerts_status "${check_name}" + ;; + check) + check "${check_name}" + ;; + show) + show_check_commands "${check_name}" + ;; + enable) + enable_alerts "${check_name}" "${message}" + ;; + disable) + disable_alerts "${check_name}" "${message}" + ;; +esac + diff --git a/nagios-nrpe/files/monitoringctl_common b/nagios-nrpe/files/monitoringctl_common new file mode 100644 index 00000000..0de87383 --- /dev/null +++ b/nagios-nrpe/files/monitoringctl_common @@ -0,0 +1,292 @@ +#!/usr/bin/env bash + +# Location of disable files +readonly var_dir="/var/lib/monitoringctl" + +readonly log_file="/var/log/monitoringctl.log" + +readonly nrpe_conf_path="/etc/nagios/nrpe.cfg" + +debian_major_version="$(cut -d "." -f 1 < /etc/debian_version)" +readonly debian_major_version + +# If no time limit is provided in CLI or found in file, this value is used +readonly default_disabled_time="1h" + +_nrpe_conf_lines='' # populated at the end of the file + + +function error() { + # $1: error message + >&2 echo -e "${1}" + exit 1 +} + +function usage_error() { + # $1: error message + >&2 echo "${1}" + >&2 echo "Execute \"${PROGNAME} --help\" for information on usage." + exit 1 +} + +function log() { + # $1: message + echo "$(now_iso) - ${PROGNAME}: ${1}" >> "${log_file}" +} + +function show_version() { + cat <, + Jérémy Lecour + and others. + +${PROGNAME} comes with ABSOLUTELY NO WARRANTY.This is free software, +and you are welcome to redistribute it under certain conditions. +See the GNU General Public License v3.0 for details. +END +} + +# Fail if argument does not respect format: XwXdXhXmXs, XhX, XmX +function filter_duration() { + # $1: duration in format specified above + _time_regex="^([0-9]+d)?(([0-9]+h(([0-9]+m?)|([0-9]+m([0-9]+s?)?))?)|(([0-9]+m([0-9]+s?)?)?))?$" + if [[ "${1}" =~ ${_time_regex} ]]; then + return 0 + fi + return 1 +} + +# Convert human writable duration into seconds +function time_to_seconds() { + # $1: formated time string + if echo "${1}" | grep -E -q '^([0-9]+[wdhms])+$'; then + echo "${1}" | sed 's/w/ * 604800 + /g; s/d/ * 86400 + /g; s/h/ * 3600 + /g; s/m/ * 60 + /g; s/s/ + /g; s/+ $//' | xargs expr + elif echo "${1}" | grep -E -q '^([0-9]+h[0-9]+$)'; then + echo "${1}" | sed 's/h/ * 3600 + /g; s/$/ * 60/' | xargs expr + elif echo "${1}" | grep -E -q '^([0-9]+m[0-9]+$)'; then + echo "${1}" | sed 's/m/ * 60 + /g' | xargs expr + else + error "Invalid duration: '${1}'." + fi +} + +# Print re-enable time in secs +function get_enable_time() { + # $1: wrapper name + _disable_file_path="$(get_disable_file_path "${1}")" + if [ ! -e "${_disable_file_path}" ]; then + return + fi + + _enable_secs="$(grep -v -E "^\s*#" "${_disable_file_path}" | sed '/^$/d' | head -n1 | awk '/^[0-9]+$/ {print $1}')" + # If file is empty, use file last change date plus default disabled time + if [ -z "${_enable_secs}" ]; then + _file_last_change_secs="$(stat -c %Z "${_disable_file_path}")" + _default_disabled_time_secs="$(time_to_seconds "${default_disabled_time}")" + _enable_secs="$(( _file_last_change_secs + _default_disabled_time_secs ))" + fi + echo "${_enable_secs}" +} + +# Print disable message +function get_disable_message() { + # $1: wrapper name + _disable_file_path="$(get_disable_file_path "${1}")" + if [ ! -e "${_disable_file_path}" ]; then + return + fi + + _disable_msg="$(sed '/^$/d' "${_disable_file_path}" | tail -n+2 | tr '\n' ' ' | awk '{$1=$1;print}')" + echo "${_disable_msg}" +} + +function now_secs() { + date +"%s" +} + +function now_iso() { + date --iso-8601=seconds +} + +# Print delay before re-enable in secs +function enable_delay() { + # $1: re-enable time in secs + echo $(( ${1} - $(now_secs) )) +} + +# Converts delay (in seconds) into human readable duration +function delay_to_string() { + # $1: delay in secs + _delay_days="$(( ${1} /86400 ))" + if [ "${_delay_days}" -eq 0 ]; then _delay_days="" + else _delay_days="${_delay_days}d"; fi + + _delay_hours="$(( (${1} %86400) /3600 ))" + if [ "${_delay_hours}" -eq 0 ]; then _delay_hours="" + else _delay_hours="${_delay_hours}h"; fi + + _delay_minutes="$(( ((${1} %86400) %3600) /60 ))" + if [ "${_delay_minutes}" -eq 0 ]; then _delay_minutes="" + else _delay_minutes="${_delay_minutes}m"; fi + + _delay_seconds="$(( ((${1} %86400) %3600) %60 ))" + if [ "${_delay_seconds}" -eq 0 ]; then _delay_seconds="" + else _delay_seconds="${_delay_seconds}s"; fi + + echo "${_delay_days}${_delay_hours}${_delay_minutes}${_delay_seconds}" +} + +function is_disabled_check() { + # $1: check name + _wrapper="$(get_check_wrapper_name "${1}")" + is_disabled_wrapper "${_wrapper}" +} + +function is_disabled_wrapper() { + # $1: wrapper name + _wrapper="${1}" + _disable_file_path="$(get_disable_file_path "${_wrapper}")" + if [ -e "${_disable_file_path}" ]; then + _enable_time="$(get_enable_time "${_wrapper}")" + _enable_delay="$(enable_delay "${_enable_time}")" + if [ "${_enable_delay}" -le "0" ]; then + echo "False" + else + echo "True" + fi + else + echo False + fi +} + +function get_disable_file_path() { + # $1: wrapper name + echo "${var_dir}/${1}_alerts_disabled" +} + + + +### Nagios configuration functions #################### + +# Print NRPE configuration, with includes, without comments +# and in the same order than NRPE does (taking account that +# order changes from Deb10) +function get_nrpe_conf() { + echo "${_nrpe_conf_lines}" +} + +# Private function to recursively get NRPE conf from file +function _get_conf_from_file() { + # $1: NRPE conf file (.cfg) + if [ ! -f "${1}" ]; then return; fi + + _conf_lines=$(grep -E -R -v --no-filename "^\s*(#.*|)$" "${1}") + while read -r _line; do + if [[ "${_line}" =~ .*'include='.* ]]; then + _conf_file=$(echo "${_line}" | cut -d= -f2) + _get_conf_from_file "${_conf_file}" + elif [[ "${_line}" =~ .*'include_dir='.* ]]; then + _conf_dir=$(echo "${_line}" | cut -d= -f2) + _get_conf_from_dir "${_conf_dir}" + else + echo "${_line}" + fi + done <<< "${_conf_lines}" +} + +# Private function to recursively get NRPE conf from directory +function _get_conf_from_dir() { + # $1: NRPE conf dir + if [ ! -d "${1}" ]; then return; fi + + if [ "${debian_major_version}" -ge 10 ]; then + # From Deb10, NRPE use scandir() with alphasort() function + _sort_command="sort" + else + # Before Deb10, NRPE use loaddir(), like find utility + _sort_command="cat -" + fi + + # Add conf files in dir to be processed recursively + for _file in $(find "${1}" -maxdepth 1 -name "*.cfg" 2> /dev/null | ${_sort_command}); do + if [ -f "${_file}" ]; then + _get_conf_from_file "${_file}" + elif [ -d "${_file}" ]; then + _get_conf_from_dir "${_file}" + fi + done +} + +# Print the checks that are configured in NRPE +function get_checks_names() { + echo "${_nrpe_conf_lines}" | grep -E "command\[check_.*\]=" | awk -F"[\\\[\\\]=]" '{sub("check_", "", $2); print $2}' | sort | uniq +} + +# Print the commands defined for check $1 in NRPE configuration +function get_check_commands() { + # $1: check name + echo "${_nrpe_conf_lines}" | grep -E "command\[check_${1}\]" | cut -d'=' -f2- +} + +# Print the checks that have no alerts_wrapper in NRPE configuration +function not_wrapped_checks() { + for _check in $(get_checks_names); do + if ! is_wrapped "${_check}"; then + echo "${_check}" + fi + done +} + +# Fail if check is not wrapped +function is_wrapped() { + # $1: check name + _cmd=$(get_check_commands "${1}" | tail -n1) + if echo "${_cmd}" | grep --quiet --no-messages alerts_wrapper; then + return 0 + fi + return 1 +} + +# Print the names that are defined in the wrappers of the checks +function get_wrappers_names() { + echo "${_nrpe_conf_lines}" | grep -s "alerts_wrapper" | awk '{ for (i=1 ; i<=NF; i++) { if ($i ~ /^(-n|--name)$/) { print $(i+1); break } } }' | tr ',' '\n' | sort | uniq +} + +# Print the wrapper name of the check +function get_check_wrapper_name() { + # $1: check name + _cmd=$(get_check_commands "${1}" | tail -n1) + if echo "${_cmd}" | grep --quiet --no-messages alerts_wrapper; then + echo "${_cmd}" | awk '/--name/ {match($0, /--name\s*([a-zA-Z0-9_\-]*)\s*/, m); print m[1]}' + fi +} + +function is_check() { + # $1: check name + _checks="$(get_checks_names)" + if echo "${_checks}" | grep --quiet -E "^${1}$"; then + return 0 + fi + return 1 +} + +function is_wrapper() { + # $1: wrapper name + _wrappers="$(get_wrappers_names)" + if echo "${_wrappers}" | grep --quiet -E "^${1}$"; then + return 0 + fi + return 1 +} + +# Print the checks that name this wrapper +function get_wrapper_checks() { + # $1: wrapper name + echo "${_nrpe_conf_lines}" | grep -E "command\[check_.*\]=" | grep -E "\-\-name\s*${1}" | awk -F"[\\\[\\\]=]" '{sub("check_", "", $2); print $2}' | sort | uniq | xargs +} + + +# Load NRPE configuration +_nrpe_conf_lines="$(_get_conf_from_file "${nrpe_conf_path}")" diff --git a/nagios-nrpe/files/monitoringctl_completion b/nagios-nrpe/files/monitoringctl_completion new file mode 100644 index 00000000..a30abc6c --- /dev/null +++ b/nagios-nrpe/files/monitoringctl_completion @@ -0,0 +1,88 @@ +#!/usr/bin/bash +# + +function _get_wrappers_names() { + grep "alerts_wrapper" --no-filename --no-messages -R /etc/nagios/ | grep --invert-match --extended-regexp "^\s*#" | awk '{ for (i=1 ; i<=NF; i++) { if ($i ~ /^(-n|--name)$/) { print $(i+1); break } } }' | tr ',' '\n' | sort | uniq +} + +function _get_checks_names() { + grep --extended-regexp --no-filename --no-messages -R "command\[check_.*\]=" /etc/nagios/ | grep --invert-match --extended-regexp "^\s*#" | awk -F"[\\\[\\\]=]" '{sub("check_", "", $2); print $2}' | sort | uniq +} + +function _monitoringctl_completion() { + local cur=${COMP_WORDS[COMP_CWORD]}; + local prev=${COMP_WORDS[COMP_CWORD-1]}; + + local action="" + for w in "${COMP_WORDS[@]}"; do + case "$w" in + status|check|enable|disable|show|list) + action="${w}" + ;; + esac + done + + local words="--help" + case "${action}" in + check|show) + checks="$(_get_checks_names)" + check="" + for w in "${COMP_WORDS[@]}"; do + for c in ${checks}; do + if [ "${c}" == "${w}" ]; then + check="${w}" + break + fi + done + done + if [ -z "${check}" ]; then + words="${checks} ${words}" + fi + if [ "${action}" == "check" ]; then + words="all --bypass-nrpe ${words}" + fi + ;; + status) + if [ "${prev}" == "status" ]; then + words="all $(_get_checks_names)" + fi + ;; + enable) + if [ "${prev}" == "enable" ]; then + words="all $(_get_wrappers_names)" + else + words="--message ${words}" + fi + ;; + disable) + if [ "${prev}" == "disable" ]; then + words="all $(_get_wrappers_names)" + elif [ "${prev}" == "-d" ] || [ "${prev}" == "--during" ]; then + words="1d 1d12h 1h 1h30m 1m 1m30s 30s" + else + words="--during --message ${words}" + fi + ;; + *) + words="status check enable disable show list ${words}" + ;; + esac + + # Avoid double + opts=(); + for i in ${words}; do + for j in "${COMP_WORDS[@]}"; do + if [[ "$i" == "$j" ]]; then + continue 2 + fi + done + opts+=("$i") + done + + COMPREPLY=($(compgen -W "${opts[*]}" -- "${cur}")) + return 0 + +} + +complete -F _monitoringctl_completion monitoringctl + diff --git a/nagios-nrpe/tasks/check-local.yml b/nagios-nrpe/tasks/check-local.yml deleted file mode 100644 index 69409314..00000000 --- a/nagios-nrpe/tasks/check-local.yml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Install check-local utilitary - -- name: Package nagios-nrpe-plugin is intalled - ansible.builtin.apt: - name: nagios-nrpe-plugin - -- name: "Remount /usr if needed" - ansible.builtin.include_role: - name: remount-usr - -- name: Utilitary check-local is installed - ansible.builtin.copy: - src: check-local - dest: /usr/local/bin/check-local - mode: "0755" - -- name: Package bash-completion is installed - ansible.builtin.apt: - name: bash-completion - -- name: Directory /etc/bash_completion.d exists - ansible.builtin.file: - path: '/etc/bash_completion.d' - state: directory - mode: '0644' - -- name: Completion for utilitary check-local is installed - ansible.builtin.copy: - src: check-local_completion - dest: /etc/bash_completion.d/check-local - mode: "0755" - - diff --git a/nagios-nrpe/tasks/main.yml b/nagios-nrpe/tasks/main.yml index 2a89da52..303b968c 100644 --- a/nagios-nrpe/tasks/main.yml +++ b/nagios-nrpe/tasks/main.yml @@ -91,6 +91,5 @@ tags: - nagios-nrpe -- ansible.builtin.include_tasks: wrapper.yml +- ansible.builtin.include_tasks: monitoringctl.yml -- ansible.builtin.include_tasks: check-local.yml diff --git a/nagios-nrpe/tasks/monitoringctl.yml b/nagios-nrpe/tasks/monitoringctl.yml new file mode 100644 index 00000000..449e5520 --- /dev/null +++ b/nagios-nrpe/tasks/monitoringctl.yml @@ -0,0 +1,162 @@ +--- + +- name: "Remount /usr if needed" + ansible.builtin.include_role: + name: remount-usr + +### alerts_wrapper and alerts_switch section + +- name: "dir /usr/local/lib/monitoringctl/ exists" + ansible.builtin.file: + path: /usr/local/lib/monitoringctl/ + state: directory + mode: '0755' + +- name: "check if old alerts_switch script is present" + ansible.builtin.stat: + path: /usr/share/scripts/alerts_switch + register: old_alerts_switch + +- name: "alerts_switch is at the right place" + ansible.builtin.command: + cmd: "mv /usr/share/scripts/alerts_switch /usr/local/bin/alerts_switch" + args: + creates: /usr/local/bin/alerts_switch + when: old_alerts_switch.stat.exists + +- name: "copy alerts_switch" + ansible.builtin.copy: + src: alerts_switch + dest: /usr/local/bin/alerts_switch + owner: root + group: root + mode: "0750" + force: true + +- name: "alerts_switch symlink for backward compatibility" + ansible.builtin.file: + src: /usr/local/bin/alerts_switch + path: /usr/share/scripts/alerts_switch + state: link + when: old_alerts_switch.stat.exists + +- name: "nagios user can run alerts_switch with sudo (used by alerts_wrapper)" + ansible.builtin.lineinfile: + path: /etc/sudoers.d/monitoringctl + regexp: "nagios.*alerts_switch" + line: "nagios ALL = NOPASSWD:/usr/local/bin/alerts_switch *" + create: true + owner: root + group: root + mode: "640" + validate: "visudo -c -f %s" + +- name: "check if old alerts_wrapper script is present" + ansible.builtin.stat: + path: "{{ nagios_plugins_directory }}/alerts_wrapper" + register: old_alerts_wrapper + +- name: "alerts_wrapper is at the right place" + ansible.builtin.command: + cmd: "mv {{ nagios_plugins_directory }}/alerts_wrapper /usr/local/lib/monitoringctl/alerts_wrapper" + creates: /usr/local/lib/monitoringctl/alerts_wrapper + when: old_alerts_wrapper.stat.exists + +- name: "copy alerts_wrapper" + ansible.builtin.copy: + src: alerts_wrapper + dest: "/usr/local/lib/monitoringctl/alerts_wrapper" + owner: root + group: staff + mode: "0755" + force: true + +- name: "alerts_wrapper symlink for backward compatibility" + ansible.builtin.file: + src: /usr/local/lib/monitoringctl/alerts_wrapper + path: "{{ nagios_plugins_directory }}/alerts_wrapper" + state: link + when: + - old_alerts_wrapper.stat.exists + - not ansible_check_mode + +- name: "copy monitoringctl_common lib" + ansible.builtin.copy: + src: monitoringctl_common + dest: /usr/local/lib/monitoringctl/common + owner: root + group: root + mode: "0644" + force: true + + +### monitoringctl section + +- name: "package bash-completion is installed" + ansible.builtin.apt: + name: bash-completion + +- name: "package nagios-nrpe-plugin is installed" + ansible.builtin.apt: + name: nagios-nrpe-plugin + +- name: "directory /etc/bash_completion.d exists" + ansible.builtin.file: + path: '/etc/bash_completion.d' + state: directory + mode: '0755' + +- name: "dir /var/lib/monitoringctl/ exists" + ansible.builtin.file: + path: /var/lib/monitoringctl/ + state: directory + mode: '0755' + +- name: "monitoringctl is not in /usr/local/sbin/" + ansible.builtin.file: + path: /usr/local/sbin/monitoringctl + state: absent + +- name: "copy monitoringctl" + ansible.builtin.copy: + src: monitoringctl + dest: /usr/local/bin/monitoringctl + owner: root + group: root + mode: "0755" + force: true + +- name: "copy monitoringctl_common lib" + ansible.builtin.copy: + src: monitoringctl_common + dest: /usr/local/lib/monitoringctl/common + owner: root + group: root + mode: "0644" + force: true + +- name: "copy monitoringctl_completion script" + ansible.builtin.copy: + src: monitoringctl_completion + dest: /etc/bash_completion.d/monitoringctl + owner: root + group: root + mode: "0644" + force: true + +- name: "copy check-local (it's just a wrapper calling 'monitoringctl check' for backward compatibility)" + ansible.builtin.copy: + src: check-local + dest: /usr/local/bin/check-local + owner: root + group: root + mode: "0755" + force: true + +- name: "copy completion for check-local" + ansible.builtin.copy: + src: check-local_completion + dest: /etc/bash_completion.d/check-local + mode: "0755" + + diff --git a/nagios-nrpe/tasks/wrapper.yml b/nagios-nrpe/tasks/wrapper.yml deleted file mode 100644 index 4eb98350..00000000 --- a/nagios-nrpe/tasks/wrapper.yml +++ /dev/null @@ -1,43 +0,0 @@ ---- - - -- name: "Remount /usr if needed" - ansible.builtin.include_role: - name: remount-usr - -- name: check if old script is present - ansible.builtin.stat: - path: /usr/share/scripts/alerts_switch - register: old_alerts_switch - -- name: alerts_switch is at the right place - ansible.builtin.command: - cmd: "mv /usr/share/scripts/alerts_switch /usr/local/bin/alerts_switch" - args: - creates: /usr/local/bin/alerts_switch - when: old_alerts_switch.stat.exists - -- name: "copy alerts_switch" - ansible.builtin.copy: - src: alerts_switch - dest: /usr/local/bin/alerts_switch - owner: root - group: root - mode: "0750" - force: true - -- name: "symlink for backward compatibility" - ansible.builtin.file: - src: /usr/local/bin/alerts_switch - dest: /usr/share/scripts/alerts_switch - state: link - when: old_alerts_switch.stat.exists - -- name: "copy alerts_wrapper" - ansible.builtin.copy: - src: alerts_wrapper - dest: "{{ nagios_plugins_directory }}/alerts_wrapper" - owner: root - group: staff - mode: "0755" - force: true \ No newline at end of file diff --git a/nagios-nrpe/templates/evolix.cfg.j2 b/nagios-nrpe/templates/evolix.cfg.j2 index 250038af..71d48d1f 100644 --- a/nagios-nrpe/templates/evolix.cfg.j2 +++ b/nagios-nrpe/templates/evolix.cfg.j2 @@ -6,95 +6,102 @@ # Allowed IPs allowed_hosts={{ nagios_nrpe_allowed_hosts | join(',') }} -# System checks -command[check_load]=/usr/lib/nagios/plugins/check_load --percpu --warning=0.7,0.6,0.5 --critical=0.9,0.8,0.7 -command[check_swap]=/usr/lib/nagios/plugins/check_swap -a -w 30% -c 20% -command[check_disk1]=/usr/lib/nagios/plugins/check_disk -e -w 10% -c 3% -W 10% -K 3% -C -w 5% -c 2% -W 5% -K 2% -p /home -x /lib/init/rw -x /dev -x /dev/shm -x /run -I '^/run/' -I '^/sys/' -X overlay -command[check_zombie_procs]=sudo /usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z -command[check_total_procs]=sudo /usr/lib/nagios/plugins/check_procs -w 400 -c 600 -command[check_users]=/usr/lib/nagios/plugins/check_users -w 5 -c 10 +# Default activated checks -# Generic services checks -command[check_smtp]=/usr/lib/nagios/plugins/check_smtp -H localhost -command[check_dns]=/usr/lib/nagios/plugins/check_dns -H evolix.net -command[check_ntp]=/usr/lib/nagios/plugins/check_ntp -H {{ nagios_nrpe_ntp_server or nagios_nrpe_default_ntp_server | mandatory }} -command[check_ssh]=/usr/lib/nagios/plugins/check_ssh localhost -command[check_mailq]=/usr/lib/nagios/plugins/check_mailq -M postfix -w 10 -c 20 +## System checks +command[check_disk1]=/usr/local/lib/monitoringctl/alerts_wrapper --name disk1 /usr/lib/nagios/plugins/check_disk -e -w 10% -c 3% -W 10% -K 3% -C -w 5% -c 2% -W 5% -K 2% -p /home -x /lib/init/rw -x /dev -x /dev/shm -x /run -I '^/run/' -I '^/sys/' -X overlay +command[check_load]=/usr/local/lib/monitoringctl/alerts_wrapper --name load /usr/lib/nagios/plugins/check_load --percpu --warning=0.7,0.6,0.5 --critical=0.9,0.8,0.7 +command[check_mem]=/usr/local/lib/monitoringctl/alerts_wrapper --name mem {{ nagios_plugins_directory }}/check_mem -f -C -w 20 -c 10 +command[check_pressure_cpu]=/usr/local/lib/monitoringctl/alerts_wrapper --name pressure_cpu /usr/lib/nagios/plugins/check_pressure --cpu -w 100000 -c 500000 +command[check_pressure_mem]=/usr/local/lib/monitoringctl/alerts_wrapper --name pressure_mem /usr/lib/nagios/plugins/check_pressure --mem --full -w 100000 -c 500000 +command[check_pressure_io]=/usr/local/lib/monitoringctl/alerts_wrapper --name pressure_io /usr/lib/nagios/plugins/check_pressure --io --full -w 100000 -c 500000 +command[check_swap]=/usr/local/lib/monitoringctl/alerts_wrapper --name swap /usr/lib/nagios/plugins/check_swap -a -w 30% -c 20% +command[check_total_procs]=/usr/local/lib/monitoringctl/alerts_wrapper --name total_procs sudo /usr/lib/nagios/plugins/check_procs -w 400 -c 600 +command[check_users]=/usr/local/lib/monitoringctl/alerts_wrapper --name users /usr/lib/nagios/plugins/check_users -w 5 -c 10 +command[check_zombie_procs]=/usr/local/lib/monitoringctl/alerts_wrapper --name zombie_procs sudo /usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z -# Specific services checks -command[check_pgsql]=/usr/lib/nagios/plugins/check_pgsql -H localhost -l nrpe -p '{{ nagios_nrpe_pgsql_passwd }}' -command[check_mysql]=/usr/lib/nagios/plugins/check_mysql -H localhost -f ~nagios/.my.cnf -command[check_mysql_slave]=/usr/lib/nagios/plugins/check_mysql --check-slave -H localhost -f ~nagios/.my.cnf -w 1800 -c 3600 -command[check_ldap]=/usr/lib/nagios/plugins/check_ldap -3 --extra-opts=@/etc/nagios/monitoring-plugins.ini -command[check_ldaps]=/usr/lib/nagios/plugins/check_ldap -3 -T --extra-opts=@/etc/nagios/monitoring-plugins.ini -command[check_imap]=/usr/lib/nagios/plugins/check_imap -H localhost -command[check_imaps]=/usr/lib/nagios/plugins/check_imap -S -H localhost -p 993 -command[check_imapproxy]=/usr/lib/nagios/plugins/check_imap -H localhost -p 1143 -command[check_pop]=/usr/lib/nagios/plugins/check_pop -H localhost -command[check_pops]=/usr/lib/nagios/plugins/check_pop -S -H localhost -p 995 -command[check_ftp]=/usr/lib/nagios/plugins/check_ftp -H localhost -command[check_ftp_users]=/usr/local/lib/nagios/plugins/check_ftp_users -w 20 -c 40 -command[check_http]=/usr/lib/nagios/plugins/check_http -e 301 -I 127.0.0.1 -H localhost -command[check_https]=/usr/lib/nagios/plugins/check_http -e 401,403 -I 127.0.0.1 -S -p 443 --sni -H ssl.evolix.net -command[check_bind]=/usr/lib/nagios/plugins/check_dig -l evolix.net -H localhost -command[check_unbound]=/usr/lib/nagios/plugins/check_dig -l evolix.net -H localhost -command[check_smb]=/usr/lib/nagios/plugins/check_tcp -H 127.0.0.1 -p 445 -command[check_tse]=/usr/lib/nagios/plugins/check_tcp -H TSEADDR -p 3389 -command[check_jboss-http]=/usr/lib/nagios/plugins/check_tcp -p 8080 -command[check_jboss-ajp13]=/usr/lib/nagios/plugins/check_tcp -p 8009 -command[check_tomcat-http]=/usr/lib/nagios/plugins/check_tcp -p 8080 -command[check_tomcat-ajp13]=/usr/lib/nagios/plugins/check_tcp -p 8009 -command[check_proxy]=/usr/lib/nagios/plugins/check_http -H {{ nagios_nrpe_check_proxy_host }} -command[check_redis]=/usr/lib/nagios/plugins/check_tcp -p 6379 -command[check_clamd]=/usr/lib/nagios/plugins/check_clamd -H /var/run/clamav/clamd.ctl -v -command[check_clamav_db]=/usr/lib/nagios/plugins/check_file_age -w 86400 -c 172800 -f /var/lib/clamav/daily.cld -command[check_ssl]=/usr/lib/nagios/plugins/check_http -f follow -I 127.0.0.1 -S -p 443 -H ssl.evolix.net -C 15,5 -command[check_elasticsearch]=/usr/lib/nagios/plugins/check_http -I 127.0.0.1 -u /_cat/health?h=st -p 9200 -r 'red' --invert-regex -command[check_memcached]=/usr/lib/nagios/plugins/check_tcp -H 127.0.0.1 -p 11211 -command[check_opendkim]=/usr/lib/nagios/plugins/check_tcp -H 127.0.0.1 -p 8891 -command[check_bkctld_setup]=sudo /usr/sbin/bkctld check-setup -command[check_bkctld_jails]=sudo /usr/sbin/bkctld check-jails -# "check_bkctld" is here as backward compatibility, but is replaced by "check_bkctld_jails" -command[check_bkctld]=sudo /usr/sbin/bkctld check -command[check_postgrey]=/usr/lib/nagios/plugins/check_tcp -p10023 -command[check_influxdb]=/usr/lib/nagios/plugins/check_http -I 127.0.0.1 -u /health -p 8086 -r '"status":"pass"' -command[check_dhcpd]=/usr/lib/nagios/plugins/check_procs -c1:1 -C dhcpd -t 60 -command[check_ipmi_sensors]=sudo /usr/lib/nagios/plugins/check_ipmi_sensor -command[check_raid_status]=/usr/lib/nagios/plugins/check_raid -command[check_dockerd]=/usr/lib/nagios/plugins/check_tcp -H /var/run/docker.sock --escape -s "GET /_ping HTTP/1.1\nHost: http\n\n" -e OK +## Generic services checks +command[check_dns]=/usr/local/lib/monitoringctl/alerts_wrapper --name dns /usr/lib/nagios/plugins/check_dns -H evolix.net +command[check_mailq]=/usr/local/lib/monitoringctl/alerts_wrapper --name mailq /usr/lib/nagios/plugins/check_mailq -M postfix -w 10 -c 20 +command[check_ntp]=/usr/local/lib/monitoringctl/alerts_wrapper --name ntp /usr/lib/nagios/plugins/check_ntp -H {{ nagios_nrpe_ntp_server or nagios_nrpe_default_ntp_server | mandatory }} +command[check_smtp]=/usr/local/lib/monitoringctl/alerts_wrapper --name smtp /usr/lib/nagios/plugins/check_smtp -H localhost +command[check_ssh]=/usr/local/lib/monitoringctl/alerts_wrapper --name ssh /usr/lib/nagios/plugins/check_ssh localhost -# Local checks (not packaged) -command[check_mem]={{ nagios_plugins_directory }}/check_mem -f -C -w 20 -c 10 -command[check_amavis]={{ nagios_plugins_directory }}/check_amavis --server 127.0.0.1 --from {{ nagios_nrpe_amavis_from }} --to postmaster@localhost --port 10024 -command[check_spamd]={{ nagios_plugins_directory }}/check_spamd -H 127.0.0.1 -command[check_nfsclient]=sudo -u www-data {{ nagios_plugins_directory }}/check_nfsclient -command[check_evobackup]={{ nagios_plugins_directory }}/check_evobackup -command[check_process]={{ nagios_plugins_directory }}/check_process {{ nagios_nrpe_processes | join(' ') }} -command[check_drbd]={{ nagios_plugins_directory }}/check_drbd -d All -c StandAlone -command[check_mongodb_connect]={{ nagios_plugins_directory }}/check_mongodb -H localhost -P27017 -A connect -command[check_glusterfs]={{ nagios_plugins_directory }}/check_glusterfs -v all -n 0 -command[check_supervisord_status]={{ nagios_plugins_directory }}/check_supervisord -command[check_varnish]={{ nagios_plugins_directory }}/check_varnish_health -i 127.0.0.1 -p 6082 -s /etc/varnish/secret -w 2 -c 4 -command[check_haproxy]=sudo {{ nagios_plugins_directory }}/check_haproxy_stats -s /run/haproxy/admin.sock -w 80 -c 90 --ignore-maint --ignore-nolb --ignore-drain -command[check_minifirewall]=sudo {{ nagios_plugins_directory }}/check_minifirewall -command[check_redis_instances]={{ nagios_plugins_directory }}/check_redis_instances -command[check_sentinel]=sudo {{ nagios_plugins_directory }}/check_sentinel -c /etc/redis/sentinel.conf -command[check_hpraid]={{ nagios_plugins_directory }}/check_hpraid -command[check_php-fpm]={{ nagios_plugins_directory }}/check_phpfpm_multi -command[check_php-fpm56]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php56/rootfs/etc/php5/fpm/pool.d/ -command[check_php-fpm70]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php70/rootfs/etc/php/7.0/fpm/pool.d/ -command[check_php-fpm73]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php73/rootfs/etc/php/7.3/fpm/pool.d/ -command[check_php-fpm74]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php74/rootfs/etc/php/7.4/fpm/pool.d/ -command[check_php-fpm80]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php80/rootfs/etc/php/8.0/fpm/pool.d/ -command[check_php-fpm81]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php81/rootfs/etc/php/8.1/fpm/pool.d/ -command[check_php-fpm82]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php82/rootfs/etc/php/8.2/fpm/pool.d/ -command[check_php-fpm83]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php83/rootfs/etc/php/8.3/fpm/pool.d/ -command[check_dhcp_pool]={{ nagios_plugins_directory }}/check_dhcp_pool -command[check_ssl_local]={{ nagios_plugins_directory }}/check_ssl_local -command[check_pressure_cpu]=/usr/lib/nagios/plugins/check_pressure --cpu -w 100000 -c 500000 -command[check_pressure_mem]=/usr/lib/nagios/plugins/check_pressure --mem --full -w 100000 -c 500000 -command[check_pressure_io]=/usr/lib/nagios/plugins/check_pressure --io --full -w 100000 -c 500000 +## Local checks (not packaged) +command[check_minifirewall]=/usr/local/lib/monitoringctl/alerts_wrapper --name minifirewall sudo {{ nagios_plugins_directory }}/check_minifirewall + + +# Optionnal checks + +## Specific services checks +#command[check_pgsql]=/usr/local/lib/monitoringctl/alerts_wrapper --name pgsql /usr/lib/nagios/plugins/check_pgsql -H localhost -l nrpe -p '{{ nagios_nrpe_pgsql_passwd }}' +#command[check_mysql]=/usr/local/lib/monitoringctl/alerts_wrapper --name mysql /usr/lib/nagios/plugins/check_mysql -H localhost -f ~nagios/.my.cnf +#command[check_mysql_slave]=/usr/local/lib/monitoringctl/alerts_wrapper --name mysql_slave /usr/lib/nagios/plugins/check_mysql --check-slave -H localhost -f ~nagios/.my.cnf -w 1800 -c 3600 +#command[check_ldap]=/usr/local/lib/monitoringctl/alerts_wrapper --name ldap /usr/lib/nagios/plugins/check_ldap -3 --extra-opts=@/etc/nagios/monitoring-plugins.ini +#command[check_ldaps]=/usr/local/lib/monitoringctl/alerts_wrapper --name ldaps /usr/lib/nagios/plugins/check_ldap -3 -T --extra-opts=@/etc/nagios/monitoring-plugins.ini +#command[check_imap]=/usr/local/lib/monitoringctl/alerts_wrapper --name imap /usr/lib/nagios/plugins/check_imap -H localhost +#command[check_imaps]=/usr/local/lib/monitoringctl/alerts_wrapper --name imaps /usr/lib/nagios/plugins/check_imap -S -H localhost -p 993 +#command[check_imapproxy]=/usr/local/lib/monitoringctl/alerts_wrapper --name imapproxy /usr/lib/nagios/plugins/check_imap -H localhost -p 1143 +#command[check_pop]=/usr/local/lib/monitoringctl/alerts_wrapper --name pop /usr/lib/nagios/plugins/check_pop -H localhost +#command[check_pops]=/usr/local/lib/monitoringctl/alerts_wrapper --name pops /usr/lib/nagios/plugins/check_pop -S -H localhost -p 995 +#command[check_ftp]=/usr/local/lib/monitoringctl/alerts_wrapper --name ftp /usr/lib/nagios/plugins/check_ftp -H localhost +#command[check_ftp_users]=/usr/local/lib/monitoringctl/alerts_wrapper --name ftp_users /usr/local/lib/nagios/plugins/check_ftp_users -w 20 -c 40 +#command[check_http]=/usr/local/lib/monitoringctl/alerts_wrapper --name http /usr/lib/nagios/plugins/check_http -e 301 -I 127.0.0.1 -H localhost +#command[check_https]=/usr/local/lib/monitoringctl/alerts_wrapper --name https /usr/lib/nagios/plugins/check_http -e 401,403 -I 127.0.0.1 -S -p 443 --sni -H ssl.evolix.net +#command[check_bind]=/usr/local/lib/monitoringctl/alerts_wrapper --name bind /usr/lib/nagios/plugins/check_dig -l evolix.net -H localhost +#command[check_unbound]=/usr/local/lib/monitoringctl/alerts_wrapper --name unbound /usr/lib/nagios/plugins/check_dig -l evolix.net -H localhost +#command[check_smb]=/usr/local/lib/monitoringctl/alerts_wrapper --name smb /usr/lib/nagios/plugins/check_tcp -H 127.0.0.1 -p 445 +#command[check_tse]=/usr/local/lib/monitoringctl/alerts_wrapper --name tse /usr/lib/nagios/plugins/check_tcp -H TSEADDR -p 3389 +#command[check_jboss-http]=/usr/local/lib/monitoringctl/alerts_wrapper --name jboss-http /usr/lib/nagios/plugins/check_tcp -p 8080 +#command[check_jboss-ajp13]=/usr/local/lib/monitoringctl/alerts_wrapper --name jboss-ajp13 /usr/lib/nagios/plugins/check_tcp -p 8009 +#command[check_tomcat-http]=/usr/local/lib/monitoringctl/alerts_wrapper --name tomcat-http /usr/lib/nagios/plugins/check_tcp -p 8080 +#command[check_tomcat-ajp13]=/usr/local/lib/monitoringctl/alerts_wrapper --name tomcat-ajp13 /usr/lib/nagios/plugins/check_tcp -p 8009 +#command[check_proxy]=/usr/local/lib/monitoringctl/alerts_wrapper --name proxy /usr/lib/nagios/plugins/check_http -H {{ nagios_nrpe_check_proxy_host }} +#command[check_redis]=/usr/local/lib/monitoringctl/alerts_wrapper --name redis /usr/lib/nagios/plugins/check_tcp -p 6379 +#command[check_clamd]=/usr/local/lib/monitoringctl/alerts_wrapper --name clamd /usr/lib/nagios/plugins/check_clamd -H /var/run/clamav/clamd.ctl -v +#command[check_clamav_db]=/usr/local/lib/monitoringctl/alerts_wrapper --name clamav_db /usr/lib/nagios/plugins/check_file_age -w 86400 -c 172800 -f /var/lib/clamav/daily.cld +#command[check_ssl]=/usr/local/lib/monitoringctl/alerts_wrapper --name ssl /usr/lib/nagios/plugins/check_http -f follow -I 127.0.0.1 -S -p 443 -H ssl.evolix.net -C 15,5 +#command[check_elasticsearch]=/usr/local/lib/monitoringctl/alerts_wrapper --name elasticsearch /usr/lib/nagios/plugins/check_http -I 127.0.0.1 -u /_cat/health?h=st -p 9200 -r 'red' --invert-regex +#command[check_memcached]=/usr/local/lib/monitoringctl/alerts_wrapper --name memcached /usr/lib/nagios/plugins/check_tcp -H 127.0.0.1 -p 11211 +#command[check_opendkim]=/usr/local/lib/monitoringctl/alerts_wrapper --name opendkim /usr/lib/nagios/plugins/check_tcp -H 127.0.0.1 -p 8891 +#command[check_bkctld_setup]=/usr/local/lib/monitoringctl/alerts_wrapper --name bkctld_setup sudo /usr/sbin/bkctld check-setup +#command[check_bkctld_jails]=/usr/local/lib/monitoringctl/alerts_wrapper --name bkctld_jails sudo /usr/sbin/bkctld check-jails +## "check_bkctld" is here as backward compatibility, but is replaced by "check_bkctld_jails" +#command[check_bkctld]=/usr/local/lib/monitoringctl/alerts_wrapper --name bkctld sudo /usr/sbin/bkctld check +#command[check_postgrey]=/usr/local/lib/monitoringctl/alerts_wrapper --name postgrey /usr/lib/nagios/plugins/check_tcp -p10023 +#command[check_influxdb]=/usr/local/lib/monitoringctl/alerts_wrapper --name influxdb /usr/lib/nagios/plugins/check_http -I 127.0.0.1 -u /health -p 8086 -r '"status":"pass"' +#command[check_dhcpd]=/usr/local/lib/monitoringctl/alerts_wrapper --name dhcpd /usr/lib/nagios/plugins/check_procs -c1:1 -C dhcpd -t 60 +#command[check_ipmi_sensors]=/usr/local/lib/monitoringctl/alerts_wrapper --name ipmi_sensors sudo /usr/lib/nagios/plugins/check_ipmi_sensor +#command[check_raid_status]=/usr/local/lib/monitoringctl/alerts_wrapper --name raid_status /usr/lib/nagios/plugins/check_raid +#command[check_dockerd]=/usr/local/lib/monitoringctl/alerts_wrapper --name dockerd /usr/lib/nagios/plugins/check_tcp -H /var/run/docker.sock --escape -s "GET /_ping HTTP/1.1\nHost: http\n\n" -e OK + +## Local checks (not packaged) +#command[check_amavis]=/usr/local/lib/monitoringctl/alerts_wrapper --name amavis {{ nagios_plugins_directory }}/check_amavis --server 127.0.0.1 --from {{ nagios_nrpe_amavis_from }} --to postmaster@localhost --port 10024 +#command[check_spamd]=/usr/local/lib/monitoringctl/alerts_wrapper --name spamd {{ nagios_plugins_directory }}/check_spamd -H 127.0.0.1 +#command[check_nfsclient]=/usr/local/lib/monitoringctl/alerts_wrapper --name nfsclient sudo -u www-data {{ nagios_plugins_directory }}/check_nfsclient +#command[check_evobackup]=/usr/local/lib/monitoringctl/alerts_wrapper --name evobackup {{ nagios_plugins_directory }}/check_evobackup +#command[check_process]=/usr/local/lib/monitoringctl/alerts_wrapper --name process {{ nagios_plugins_directory }}/check_process {{ nagios_nrpe_processes | join(' ') }} +#command[check_drbd]=/usr/local/lib/monitoringctl/alerts_wrapper --name drbd {{ nagios_plugins_directory }}/check_drbd -d All -c StandAlone +#command[check_mongodb_connect]=/usr/local/lib/monitoringctl/alerts_wrapper --name mongodb_connect {{ nagios_plugins_directory }}/check_mongodb -H localhost -P27017 -A connect +#command[check_glusterfs]=/usr/local/lib/monitoringctl/alerts_wrapper --name glusterfs {{ nagios_plugins_directory }}/check_glusterfs -v all -n 0 +#command[check_supervisord_status]=/usr/local/lib/monitoringctl/alerts_wrapper --name supervisord_status {{ nagios_plugins_directory }}/check_supervisord +#command[check_varnish]=/usr/local/lib/monitoringctl/alerts_wrapper --name varnish {{ nagios_plugins_directory }}/check_varnish_health -i 127.0.0.1 -p 6082 -s /etc/varnish/secret -w 2 -c 4 +#command[check_haproxy]=/usr/local/lib/monitoringctl/alerts_wrapper --name haproxy sudo {{ nagios_plugins_directory }}/check_haproxy_stats -s /run/haproxy/admin.sock -w 80 -c 90 --ignore-maint --ignore-nolb --ignore-drain +#command[check_redis_instances]=/usr/local/lib/monitoringctl/alerts_wrapper --name redis_instances {{ nagios_plugins_directory }}/check_redis_instances +#command[check_sentinel]=/usr/local/lib/monitoringctl/alerts_wrapper --name sentinel sudo {{ nagios_plugins_directory }}/check_sentinel -c /etc/redis/sentinel.conf +#command[check_hpraid]=/usr/local/lib/monitoringctl/alerts_wrapper --name hpraid {{ nagios_plugins_directory }}/check_hpraid +#command[check_php-fpm]=/usr/local/lib/monitoringctl/alerts_wrapper --name php-fpm {{ nagios_plugins_directory }}/check_phpfpm_multi +#command[check_php-fpm56]=/usr/local/lib/monitoringctl/alerts_wrapper --name php-fpm56 sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php56/rootfs/etc/php5/fpm/pool.d/ +#command[check_php-fpm70]=/usr/local/lib/monitoringctl/alerts_wrapper --name php-fpm70 sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php70/rootfs/etc/php/7.0/fpm/pool.d/ +#command[check_php-fpm73]=/usr/local/lib/monitoringctl/alerts_wrapper --name php-fpm73 sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php73/rootfs/etc/php/7.3/fpm/pool.d/ +#command[check_php-fpm74]=/usr/local/lib/monitoringctl/alerts_wrapper --name php-fpm74 sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php74/rootfs/etc/php/7.4/fpm/pool.d/ +#command[check_php-fpm80]=/usr/local/lib/monitoringctl/alerts_wrapper --name php-fpm80 sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php80/rootfs/etc/php/8.0/fpm/pool.d/ +#command[check_php-fpm81]=/usr/local/lib/monitoringctl/alerts_wrapper --name php-fpm81 sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php81/rootfs/etc/php/8.1/fpm/pool.d/ +#command[check_php-fpm82]=/usr/local/lib/monitoringctl/alerts_wrapper --name php-fpm82 sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php82/rootfs/etc/php/8.2/fpm/pool.d/ +#command[check_php-fpm83]=/usr/local/lib/monitoringctl/alerts_wrapper --name php-fpm83 sudo {{ nagios_plugins_directory }}/check_phpfpm_multi /var/lib/lxc/php83/rootfs/etc/php/8.3/fpm/pool.d/ +#command[check_dhcp_pool]=/usr/local/lib/monitoringctl/alerts_wrapper --name dhcp_pool {{ nagios_plugins_directory }}/check_dhcp_pool +#command[check_ssl_local]=/usr/local/lib/monitoringctl/alerts_wrapper --name ssl_local {{ nagios_plugins_directory }}/check_ssl_local # Check HTTP "many". Use this to check many websites (http, https, ports, sockets and SSL certificates). # Beware! All checks must not take more than 10s! -#command[check_https]={{ nagios_plugins_directory }}/check_http_many +#command[check_https]=/usr/local/lib/monitoringctl/alerts_wrapper --name https {{ nagios_plugins_directory }}/check_http_many diff --git a/openvpn/tasks/debian.yml b/openvpn/tasks/debian.yml index 173299b4..8e57709c 100644 --- a/openvpn/tasks/debian.yml +++ b/openvpn/tasks/debian.yml @@ -201,7 +201,7 @@ ansible.builtin.lineinfile: dest: "/etc/nagios/nrpe.d/evolix.cfg" regexp: '^command\[check_openvpn\]=' - line: "command[check_openvpn]=/usr/local/lib/nagios/plugins/check_openvpn -H 127.0.0.1 -p 1195 -P {{ management_pwd }}" + line: "command[check_openvpn]=/usr/local/lib/monitoringctl/alerts_wrapper --name openvpn /usr/local/lib/nagios/plugins/check_openvpn -H 127.0.0.1 -p 1195 -P {{ management_pwd }}" notify: restart nagios-nrpe-server when: nrpe_evolix_config.stat.exists @@ -233,7 +233,7 @@ ansible.builtin.lineinfile: dest: "/etc/nagios/nrpe.d/evolix.cfg" regexp: '^command\[check_openvpn_certificates\]=' - line: "command[check_openvpn_certificates]=sudo /usr/local/lib/nagios/plugins/check_openvpn_certificates.sh" + line: "command[check_openvpn_certificates]=/usr/local/lib/monitoringctl/alerts_wrapper --name openvpn_certificates sudo /usr/local/lib/nagios/plugins/check_openvpn_certificates.sh" notify: restart nagios-nrpe-server when: nrpe_evolix_config.stat.exists diff --git a/postgresql/tasks/nrpe.yml b/postgresql/tasks/nrpe.yml index a78c249b..bf6dcb7d 100644 --- a/postgresql/tasks/nrpe.yml +++ b/postgresql/tasks/nrpe.yml @@ -43,7 +43,7 @@ ansible.builtin.lineinfile: name: /etc/nagios/nrpe.d/evolix.cfg regexp: '^command\[check_pgsql\]=' - line: 'command[check_pgsql]=/usr/lib/nagios/plugins/check_pgsql -H localhost -l nrpe -p "{{ postgresql_nrpe_password.stdout }}"' + line: 'command[check_pgsql]=/usr/local/lib/monitoringctl/alerts_wrapper --name pgsql /usr/lib/nagios/plugins/check_pgsql -H localhost -l nrpe -p "{{ postgresql_nrpe_password.stdout }}"' notify: restart nagios-nrpe-server when: postgresql_create_nrpe_user is changed when: nrpe_evolix_config.stat.exists diff --git a/rabbitmq/tasks/nrpe.yml b/rabbitmq/tasks/nrpe.yml index d181b07c..7a6484b0 100644 --- a/rabbitmq/tasks/nrpe.yml +++ b/rabbitmq/tasks/nrpe.yml @@ -40,7 +40,7 @@ ansible.builtin.lineinfile: dest: /etc/nagios/nrpe.d/evolix.cfg regexp: 'command\[check_rab_connection_count\]' - line: 'command[check_rab_connection_count]=sudo /usr/local/lib/nagios/plugins/check_rabbitmq -a connection_count -C {{ rabbitmq_connections_critical }} -W {{ rabbitmq_connections_warning }}' + line: 'command[check_rab_connection_count]=/usr/local/lib/monitoringctl/alerts_wrapper --name rab_connection_count sudo /usr/local/lib/nagios/plugins/check_rabbitmq -a connection_count -C {{ rabbitmq_connections_critical }} -W {{ rabbitmq_connections_warning }}' notify: restart nagios-nrpe-server - name: sudo without password for nagios diff --git a/redis/tasks/nrpe.yml b/redis/tasks/nrpe.yml index a786c78f..fe963069 100644 --- a/redis/tasks/nrpe.yml +++ b/redis/tasks/nrpe.yml @@ -60,7 +60,7 @@ ansible.builtin.replace: dest: /etc/nagios/nrpe.d/evolix.cfg regexp: '^command\[check_redis\]=.+' - replace: 'command[check_redis]=sudo {{ redis_check_redis_path }} -H {{ redis_bind_interfaces | first }} -p {{ redis_port }}' + replace: 'command[check_redis]=/usr/local/lib/monitoringctl/alerts_wrapper --name redis sudo {{ redis_check_redis_path }} -H {{ redis_bind_interfaces | first }} -p {{ redis_port }}' when: redis_instance_name is undefined notify: restart nagios-nrpe-server tags: @@ -99,7 +99,7 @@ ansible.builtin.replace: dest: /etc/nagios/nrpe.d/evolix.cfg regexp: '^command\[check_redis\]=.+' - replace: 'command[check_redis]=sudo /usr/local/lib/nagios/plugins/check_redis_instances' + replace: 'command[check_redis]=/usr/local/lib/monitoringctl/alerts_wrapper --name redis sudo /usr/local/lib/nagios/plugins/check_redis_instances' when: redis_instance_name is defined notify: restart nagios-nrpe-server tags: