From 073f2b5b09f7d80c2c33a9b7d269b7618259fcd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Thu, 28 May 2020 13:01:50 +0900 Subject: [PATCH 01/19] nqgios-nrpe: Add check_hpraid --- nagios-nrpe/files/plugins/check_hpraid | 266 +++++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 nagios-nrpe/files/plugins/check_hpraid diff --git a/nagios-nrpe/files/plugins/check_hpraid b/nagios-nrpe/files/plugins/check_hpraid new file mode 100644 index 00000000..3bd285cb --- /dev/null +++ b/nagios-nrpe/files/plugins/check_hpraid @@ -0,0 +1,266 @@ +#!/usr/bin/env bash +set -euo pipefail + +# This check_hpraid is a fork from check_cciss v0.15 written by Simone Rosa. +# Upstream now at: +# https://gitea.evolix.org/evolix/ansible-roles/src/branch/stable/nagios-nrpe/files/plugins +# Source of the fork: +# https://exchange.nagios.org/directory/Plugins/Hardware/Storage-Systems/RAID-Controllers/check_cciss--2D-HP-and-Compaq-Smart-Array-Hardware-status/details +# +# Description: +# +# This plugin checks hardware status for Smart Array Controllers, +# using HPE Smart Storage Administrator. It should support Debian 9 and over. +# (Array, controller, cache, battery, etc...) +# +# NOTE: +# +# You need to install the proprietary tool HPE Smart Storage Administrator (ssacli) from: +# https://downloads.linux.hpe.com/SDR/repo/mcp +# Also NRPE need to launch ssacli as root. +# +# Please add this line to /etc/sudoers : +# -------------------------------------------------- +# nagios ALL=NOPASSWD: /usr/sbin/ssacli +# +# Examples: +# +# ./check_cciss +# ---------------- +# RAID OK +# +# ./check_cciss -v +# ------------------- +# RAID OK: Smart Array 6i in Slot 0 array A logicaldrive 1 (67.8 GB, RAID 1+0, OK) +# [Controller Status: OK Cache Status: OK Battery Status: OK] +# +# RAID CRITICAL - HP Smart Array Failed: Smart Array 6i in Slot 0 (Embedded) \ +# array A logicaldrive 1 (33.9 GB, RAID 1, Interim Recovery Mode) \ +# physicaldrive 1:0 (port 1:id 0 , Parallel SCSI, --- GB, Failed) +# +# RAID WARNING - HP Smart Array Rebuilding: Smart Array 6i in Slot 0 (Embedded) \ +# array A logicaldrive 1 (33.9 GB, RAID 1, Recovering, 26% complete) \ +# physicaldrive 1:0 (port 1:id 0 , Parallel SCSI, 36.4 GB, Rebuilding) +# +# ./check_cciss -v -p +# -------------------- +# RAID OK: Smart Array 6i in Slot 0 (Embedded) array A logicaldrive 1 (33.9 GB, RAID 1, OK) +# physicaldrive 2:0 (port 2:id 0 , Parallel SCSI, 36.4 GB, OK) +# physicaldrive 2:1 (port 2:id 1 , Parallel SCSI, 36.4 GB, OK) +# physicaldrive 1:5 (port 1:id 5 , Parallel SCSI, 72.8 GB, OK, spare) +# [Controller Status: OK Cache Status: OK Battery/Capacitor Status: OK] +# +# RAID CRITICAL - HP Smart Array Failed: Smart Array 6i in Slot 0 (Embedded) \ +# array A logicaldrive 1 (33.9 GB, RAID 1, Interim Recovery Mode) \ +# physicaldrive 1:0 (port 1:id 0 , Parallel SCSI, --- GB, Failed) \ +# physicaldrive 1:1 (port 1:id 1 , Parallel SCSI, 36.4 GB, OK) +# +# RAID WARNING - HP Smart Array Rebuilding: Smart Array 6i in Slot 0 (Embedded) \ +# array A logicaldrive 1 (33.9 GB, RAID 1, Recovering, 26% complete) \ +# physicaldrive 1:0 (port 1:id 0 , Parallel SCSI, 36.4 GB, Rebuilding) \ +# physicaldrive 1:1 (port 1:id 1 , Parallel SCSI, 36.4 GB, OK) +# +# ./check_cciss -v -b +# ---------------- +# +# RAID OK: Smart Array 6i in Slot 0 (Embedded) array A logicaldrive 1 (33.9 GB, RAID 1, OK) [Controller Status: OK] +# +# [insted of] +# RAID CRITICAL - HP Smart Array Failed: Smart Array 6i in Slot 0 (Embedded) \ +# Controller Status: OK Cache Status: Temporarily Disabled \ +# Battery/Capacitor Status: Failed (Replace Batteries/Capacitors) + +PROGNAME=$(basename $0) +NAGIOS_PLUGINS="/usr/lib/nagios/plugins/" +REVISION=$(echo '0.16-evolix') +DEBUG="0" +VERBOSE="0" +ssacli=$(command -v ssacli) +PHYSICAL_DRIVE=0 + +. ${NAGIOS_PLUGINS}/utils.sh + +print_usage() { + echo "" + echo "Usage: $PROGNAME [-v] [-p] [-e ] [-E ] [-b] [-s] [-d]" + echo "Usage: $PROGNAME [-h]" + echo "Usage: $PROGNAME [-V]" + echo "" + echo " -v = show status and informations about RAID" + echo " -p = show detail for physical drives" + echo " -e = exclude slot number" + echo " -b = exclude battery/capacitor/cache status check" + echo " -d = use for debug (command line mode)" + echo " -h = help information" + echo " -V = version information" + echo "" + echo " =============" +} + +print_help() { + print_revision $PROGNAME $REVISION + echo "" + print_usage + echo "" + echo "This plugin checks hardware status for Smart Array Controllers," + echo "using HPE Smart Storage Administrator." + echo "" + support + exit 0 +} + +while getopts "N:cvpbsde:Vh" options +do + case $options in + N) ;; + c) ;; + v) VERBOSE=1;; + p) PHYSICAL_DRIVE=1;; + d) DEBUG=1;; + e) EXCLUDE_SLOT=1 + excludeslot="$OPTARG";; + b) EXCLUDE_BATTERY=1;; + V) print_revision $PROGNAME $REVISION + exit 0;; + h) print_help + exit 0;; + \?) print_usage + exit 0;; + *) print_usage + exit 0;; + esac +done + +# Use smartpqi driver +# https://manpages.debian.org/buster/manpages/smartpqi.4.en.html +if [ -d /sys/bus/pci/drivers/smartpqi ]; then + DRIVER="/sys/bus/pci/drivers/smartpqi" + driverPresent='YES.' +else + driverPresent='No!' +fi +if [ "$DEBUG" = "1" ]; then + echo "### Check if \"HP Smart Array\" ($DRIVER) is present >>>\n"${driverPresent}"\n" +fi +if [[ "$driverPresent" == "No!" ]]; then + echo "RAID UNKNOWN - HP Smart Array not found" + exit $STATE_UNKNOWN +fi + +# Check if "HP Array Utility CLI" is present +if [ "$DEBUG" = "1" ]; then + echo "### Check if \"ssacli\" is present >>>\n" +fi +if [ ! -x $ssacli ]; then + if [ -x $ssacli ]; then + if [ "$DEBUG" = "1" ]; then + echo "### \"ssacli\" is present >>>\n" + fi + else + echo "ERROR: ssacli tools should be installed and with right sudoers/permissions (see the notes above)" + exit $STATE_UNKNOWN + fi +fi + +# Check if "HP Controller" work correctly +check=$(sudo -u root $ssacli controller all show status 2>&1) +status=$? +if [ "$DEBUG" = "1" ]; then + echo "### Check if \"HP Controller\" work correctly >>>\n"${check}"\n" +fi +if test ${status} -ne 0; then + echo "RAID UNKNOWN - $ssacli did not execute properly : "${check} + exit $STATE_UNKNOWN +fi + +# Get "Slot" & exclude slot needed +EXCLUDE_SLOT=${EXCLUDE_SLOT:-0} +if [ "$EXCLUDE_SLOT" = "1" ]; then + slots=$(echo ${check} | egrep -o "Slot \w" | awk '{print $NF}' | grep -v "$excludeslot") +else + slots=$(echo ${check} | egrep -o "Slot \w" | awk '{print $NF}') +fi +if [ "$DEBUG" = "1" ]; then + echo "### Get \"Slot\" & exclude slot not needed >>>\n"${slots}"\n" +fi +for slot in $slots; do + # Get "logicaldrive" for slot + check2b=$(sudo -u root $ssacli controller slot=$slot logicaldrive all show 2>&1) + status=$? + if test ${status} -ne 0; then + echo "RAID UNKNOWN - $ssacli did not execute properly : "${check2b} + exit $STATE_UNKNOWN + fi + check2=${check2:-} + check2="$check2$check2b" + if [ "$DEBUG" = "1" ]; then + echo "### Get \"logicaldrive\" for slot >>>\n"${check2b}"\n" + fi + + # Get "physicaldrive" for slot + if [ "$PHYSICAL_DRIVE" = "1" -o "$DEBUG" = "1" ]; then + check2b=$(sudo -u root $ssacli controller slot=$slot physicaldrive all show | sed -e 's/\?/\-/g' 2>&1 | grep "physicaldrive") + else + check2b=$(sudo -u root $ssacli controller slot=$slot physicaldrive all show | sed -e 's/\?/\-/g' 2>&1 | grep "physicaldrive" | (grep "\(Failure\|Failed\|Rebuilding\)" || true)) + fi + status=$? + if [ "$PHYSICAL_DRIVE" = "1" -o "$DEBUG" = "1" ]; then + if test ${status} -ne 0; then + echo "RAID UNKNOWN - $ssacli did not execute properly : "${check2b} + exit $STATE_UNKNOWN + fi + fi + check2="$check2$check2b" + if [ "$DEBUG" = "1" ]; then + echo "### Get \"physicaldrive\" for slot >>>\n"${check2b}"\n" + fi +done + +# Check STATUS +if [ "$DEBUG" = "1" ]; then + echo "### Check STATUS >>>" +fi + +# Omit battery/capacitor/cache status check if requested +EXCLUDE_BATTERY=${EXCLUDE_BATTERY:-0} +if [ "$EXCLUDE_BATTERY" = "1" ]; then + check=$(echo "$check" | grep -v 'Battery/Capacitor Status: Failed (Replace Batteries/Capacitors)') + check=$(echo "$check" | grep -v 'Cache Status: Temporarily Disabled') +fi + +if echo ${check} | egrep Failed >/dev/null; then + echo "RAID CRITICAL - HP Smart Array Failed: "${check} | egrep Failed + exit $STATE_CRITICAL +elif echo ${check} | egrep Disabled >/dev/null; then + echo "RAID CRITICAL - HP Smart Array Problem: "${check} | egrep Disabled + exit $STATE_CRITICAL +elif echo ${check2} | egrep Failed >/dev/null; then + echo "RAID CRITICAL - HP Smart Array Failed: "${check2} | egrep Failed + exit $STATE_CRITICAL +elif echo ${check2} | egrep Failure >/dev/null; then + echo "RAID WARNING - Component Failure: "${check2} | egrep Failure + exit $STATE_WARNING +elif echo ${check2} | egrep Rebuild >/dev/null; then + echo "RAID WARNING - HP Smart Array Rebuilding: "${check2} | egrep Rebuild + exit $STATE_WARNING +elif echo ${check2} | egrep Recover >/dev/null; then + echo "RAID WARNING - HP Smart Array Recovering: "${check2} | egrep Recover + exit $STATE_WARNING +elif echo ${check} | egrep "Cache Status: Temporarily Disabled" >/dev/null; then + echo "RAID WARNING - HP Smart Array Cache Disabled: "${check} + exit $STATE_WARNING +elif echo ${check} | egrep FIRMWARE >/dev/null; then + echo "RAID WARNING - "${check} + exit $STATE_WARNING +else + if [ "$DEBUG" = "1" -o "$VERBOSE" = "1" ]; then + check3=$(echo "${check}" | egrep Status) + check3=$(echo ${check3}) + echo "RAID OK: "${check2}" ["${check3}"]" + else + echo "RAID OK" + fi + exit $STATE_OK +fi + +exit $STATE_UNKNOWN From 0307c0b0666139b87f0e0d1a999a00d717720fbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Thu, 28 May 2020 15:33:00 +0900 Subject: [PATCH 02/19] nagios-nrpe: Adding licence GPLv2 to check_hpraid Also describe what has been removed/changed from the original source. --- nagios-nrpe/files/plugins/check_hpraid | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nagios-nrpe/files/plugins/check_hpraid b/nagios-nrpe/files/plugins/check_hpraid index 3bd285cb..b5514a87 100644 --- a/nagios-nrpe/files/plugins/check_hpraid +++ b/nagios-nrpe/files/plugins/check_hpraid @@ -2,11 +2,13 @@ set -euo pipefail # This check_hpraid is a fork from check_cciss v0.15 written by Simone Rosa. +# Usage of old tools and drivers were removed to use only the smartpqi driver and the ssacli tool from HP. # Upstream now at: # https://gitea.evolix.org/evolix/ansible-roles/src/branch/stable/nagios-nrpe/files/plugins # Source of the fork: # https://exchange.nagios.org/directory/Plugins/Hardware/Storage-Systems/RAID-Controllers/check_cciss--2D-HP-and-Compaq-Smart-Array-Hardware-status/details -# +# +# Licence: GPLv2 # Description: # # This plugin checks hardware status for Smart Array Controllers, From f35cbdbe3046783b06d5ec720441c149c60a0676 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Thu, 28 May 2020 16:35:11 +0900 Subject: [PATCH 03/19] nagios-nrpe: shellchecked check_hp Also refactored the checking part and outputing. --- nagios-nrpe/files/plugins/check_hpraid | 125 +++++++++++++------------ 1 file changed, 65 insertions(+), 60 deletions(-) diff --git a/nagios-nrpe/files/plugins/check_hpraid b/nagios-nrpe/files/plugins/check_hpraid index b5514a87..1bac12e6 100644 --- a/nagios-nrpe/files/plugins/check_hpraid +++ b/nagios-nrpe/files/plugins/check_hpraid @@ -1,9 +1,13 @@ #!/usr/bin/env bash +# shellcheck disable=SC2028 set -euo pipefail # This check_hpraid is a fork from check_cciss v0.15 written by Simone Rosa. +# Fork written by Evolix and for Evolix usage (Debian only). # Usage of old tools and drivers were removed to use only the smartpqi driver and the ssacli tool from HP. -# Upstream now at: +# Tools not used on Debian were also removed. +# Linting tool shellcheck was used to use a better bash coding style. +# Upstream at: # https://gitea.evolix.org/evolix/ansible-roles/src/branch/stable/nagios-nrpe/files/plugins # Source of the fork: # https://exchange.nagios.org/directory/Plugins/Hardware/Storage-Systems/RAID-Controllers/check_cciss--2D-HP-and-Compaq-Smart-Array-Hardware-status/details @@ -72,14 +76,15 @@ set -euo pipefail # Controller Status: OK Cache Status: Temporarily Disabled \ # Battery/Capacitor Status: Failed (Replace Batteries/Capacitors) -PROGNAME=$(basename $0) +PROGNAME=$(basename "$0") NAGIOS_PLUGINS="/usr/lib/nagios/plugins/" -REVISION=$(echo '0.16-evolix') +REVISION="0.16-evolix" DEBUG="0" VERBOSE="0" ssacli=$(command -v ssacli) PHYSICAL_DRIVE=0 +# shellcheck source=/dev/null . ${NAGIOS_PLUGINS}/utils.sh print_usage() { @@ -100,7 +105,7 @@ print_usage() { } print_help() { - print_revision $PROGNAME $REVISION + print_revision "$PROGNAME" "$REVISION" echo "" print_usage echo "" @@ -122,7 +127,7 @@ do e) EXCLUDE_SLOT=1 excludeslot="$OPTARG";; b) EXCLUDE_BATTERY=1;; - V) print_revision $PROGNAME $REVISION + V) print_revision "$PROGNAME" "$REVISION" exit 0;; h) print_help exit 0;; @@ -142,79 +147,80 @@ else driverPresent='No!' fi if [ "$DEBUG" = "1" ]; then - echo "### Check if \"HP Smart Array\" ($DRIVER) is present >>>\n"${driverPresent}"\n" + echo "### Check if \"HP Smart Array\" ($DRIVER) is present >>>\n${driverPresent}\n" fi if [[ "$driverPresent" == "No!" ]]; then echo "RAID UNKNOWN - HP Smart Array not found" - exit $STATE_UNKNOWN + exit "$STATE_UNKNOWN" fi # Check if "HP Array Utility CLI" is present if [ "$DEBUG" = "1" ]; then echo "### Check if \"ssacli\" is present >>>\n" fi -if [ ! -x $ssacli ]; then - if [ -x $ssacli ]; then +if [ ! -x "$ssacli" ]; then + if [ -x "$ssacli" ]; then if [ "$DEBUG" = "1" ]; then echo "### \"ssacli\" is present >>>\n" fi else echo "ERROR: ssacli tools should be installed and with right sudoers/permissions (see the notes above)" - exit $STATE_UNKNOWN + exit "$STATE_UNKNOWN" fi fi # Check if "HP Controller" work correctly -check=$(sudo -u root $ssacli controller all show status 2>&1) +check=$(sudo -u root "$ssacli" controller all show status 2>&1) status=$? if [ "$DEBUG" = "1" ]; then - echo "### Check if \"HP Controller\" work correctly >>>\n"${check}"\n" + echo "### Check if \"HP Controller\" work correctly >>>\n""${check}""\n" fi if test ${status} -ne 0; then - echo "RAID UNKNOWN - $ssacli did not execute properly : "${check} - exit $STATE_UNKNOWN + echo "RAID UNKNOWN - $ssacli did not execute properly : ""${check}" + exit "$STATE_UNKNOWN" fi # Get "Slot" & exclude slot needed EXCLUDE_SLOT=${EXCLUDE_SLOT:-0} if [ "$EXCLUDE_SLOT" = "1" ]; then - slots=$(echo ${check} | egrep -o "Slot \w" | awk '{print $NF}' | grep -v "$excludeslot") + slots=$(grep -E -o "Slot \w" <<< "$check" | awk '{print $NF}' | grep -v "$excludeslot") else - slots=$(echo ${check} | egrep -o "Slot \w" | awk '{print $NF}') + slots=$(grep -E -o "Slot \w" <<< "$check" | awk '{print $NF}') fi if [ "$DEBUG" = "1" ]; then - echo "### Get \"Slot\" & exclude slot not needed >>>\n"${slots}"\n" + echo "### Get \"Slot\" & exclude slot not needed >>>\n""${slots}""\n" fi + for slot in $slots; do # Get "logicaldrive" for slot - check2b=$(sudo -u root $ssacli controller slot=$slot logicaldrive all show 2>&1) + check2b=$(sudo -u root "$ssacli" controller slot="$slot" logicaldrive all show 2>&1) status=$? if test ${status} -ne 0; then - echo "RAID UNKNOWN - $ssacli did not execute properly : "${check2b} - exit $STATE_UNKNOWN + echo "RAID UNKNOWN - $ssacli did not execute properly : ""${check2b}" + exit "$STATE_UNKNOWN" fi check2=${check2:-} check2="$check2$check2b" if [ "$DEBUG" = "1" ]; then - echo "### Get \"logicaldrive\" for slot >>>\n"${check2b}"\n" + echo "### Get \"logicaldrive\" for slot >>>\n""${check2b}""\n" fi # Get "physicaldrive" for slot - if [ "$PHYSICAL_DRIVE" = "1" -o "$DEBUG" = "1" ]; then - check2b=$(sudo -u root $ssacli controller slot=$slot physicaldrive all show | sed -e 's/\?/\-/g' 2>&1 | grep "physicaldrive") + if [ "$PHYSICAL_DRIVE" = "1" ] || [ "$DEBUG" = "1" ]; then + check2b=$(sudo -u root "$ssacli" controller slot="$slot" physicaldrive all show | sed -e 's/\?/\-/g' 2>&1 | grep "physicaldrive") else - check2b=$(sudo -u root $ssacli controller slot=$slot physicaldrive all show | sed -e 's/\?/\-/g' 2>&1 | grep "physicaldrive" | (grep "\(Failure\|Failed\|Rebuilding\)" || true)) + check2b=$(sudo -u root "$ssacli" controller slot="$slot" physicaldrive all show | sed -e 's/\?/\-/g' 2>&1 | grep "physicaldrive" | (grep "\(Failure\|Failed\|Rebuilding\)" || true)) fi status=$? - if [ "$PHYSICAL_DRIVE" = "1" -o "$DEBUG" = "1" ]; then + if [ "$PHYSICAL_DRIVE" = "1" ] || [ "$DEBUG" = "1" ]; then if test ${status} -ne 0; then - echo "RAID UNKNOWN - $ssacli did not execute properly : "${check2b} - exit $STATE_UNKNOWN + echo "RAID UNKNOWN - $ssacli did not execute properly : ""${check2b}" + exit "$STATE_UNKNOWN" fi fi check2="$check2$check2b" if [ "$DEBUG" = "1" ]; then - echo "### Get \"physicaldrive\" for slot >>>\n"${check2b}"\n" + echo "### Get \"physicaldrive\" for slot >>>\n""${check2b}""\n" fi done @@ -226,43 +232,42 @@ fi # Omit battery/capacitor/cache status check if requested EXCLUDE_BATTERY=${EXCLUDE_BATTERY:-0} if [ "$EXCLUDE_BATTERY" = "1" ]; then - check=$(echo "$check" | grep -v 'Battery/Capacitor Status: Failed (Replace Batteries/Capacitors)') - check=$(echo "$check" | grep -v 'Cache Status: Temporarily Disabled') + check=$(grep -v 'Battery/Capacitor Status: Failed (Replace Batteries/Capacitors)' "$check") + check=$(grep -v 'Cache Status: Temporarily Disabled' "$check") fi -if echo ${check} | egrep Failed >/dev/null; then - echo "RAID CRITICAL - HP Smart Array Failed: "${check} | egrep Failed - exit $STATE_CRITICAL -elif echo ${check} | egrep Disabled >/dev/null; then - echo "RAID CRITICAL - HP Smart Array Problem: "${check} | egrep Disabled - exit $STATE_CRITICAL -elif echo ${check2} | egrep Failed >/dev/null; then - echo "RAID CRITICAL - HP Smart Array Failed: "${check2} | egrep Failed - exit $STATE_CRITICAL -elif echo ${check2} | egrep Failure >/dev/null; then - echo "RAID WARNING - Component Failure: "${check2} | egrep Failure - exit $STATE_WARNING -elif echo ${check2} | egrep Rebuild >/dev/null; then - echo "RAID WARNING - HP Smart Array Rebuilding: "${check2} | egrep Rebuild - exit $STATE_WARNING -elif echo ${check2} | egrep Recover >/dev/null; then - echo "RAID WARNING - HP Smart Array Recovering: "${check2} | egrep Recover - exit $STATE_WARNING -elif echo ${check} | egrep "Cache Status: Temporarily Disabled" >/dev/null; then - echo "RAID WARNING - HP Smart Array Cache Disabled: "${check} - exit $STATE_WARNING -elif echo ${check} | egrep FIRMWARE >/dev/null; then - echo "RAID WARNING - "${check} - exit $STATE_WARNING +if grep -qiE Failed <<< "$check"; then + echo "RAID CRITICAL - HP Smart Array Failed: ${check}" + exit "$STATE_CRITICAL" +elif grep -qiE Disabled <<< "$check"; then + echo "RAID CRITICAL - HP Smart Array Problem: ${check}" + exit "$STATE_CRITICAL" +elif grep -qiE Failed <<< "$check2"; then + echo "RAID CRITICAL - HP Smart Array Failed: ${check2}" + exit "$STATE_CRITICAL" +elif grep -qiE Failure <<< "$check2"; then + echo "RAID WARNING - Component Failure: ${check2}" + exit "$STATE_WARNING" +elif grep -qiE Rebuild <<< "$check2"; then + echo "RAID WARNING - HP Smart Array Rebuilding: ${check2}" + exit "$STATE_WARNING" +elif grep -qiE Recover <<< "$check2"; then + echo "RAID WARNING - HP Smart Array Recovering: ${check2}" + exit "$STATE_WARNING" +elif grep -qiE "Cache Status: Temporarily Disabled" <<< "$check"; then + echo "RAID WARNING - HP Smart Array Cache Disabled: ${check}" + exit "$STATE_WARNING" +elif grep -qiE FIRMWARE <<< "$check"; then + echo "RAID WARNING - ${check}" + exit "$STATE_WARNING" else - if [ "$DEBUG" = "1" -o "$VERBOSE" = "1" ]; then - check3=$(echo "${check}" | egrep Status) - check3=$(echo ${check3}) - echo "RAID OK: "${check2}" ["${check3}"]" + if [ "$DEBUG" = "1" ] || [ "$VERBOSE" = "1" ]; then + check3=$(grep -E Status <<< "$check") + echo "RAID OK: ${check2} [${check3}]" else echo "RAID OK" fi - exit $STATE_OK + exit "$STATE_OK" fi -exit $STATE_UNKNOWN +exit "$STATE_UNKNOWN" From 314cd2c1de65490fb427b0640617106afb9c76c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Fri, 29 May 2020 09:43:15 +0900 Subject: [PATCH 04/19] nagios-nrpe: Added hpsa support to check_hpraid Also handle empty slots. --- nagios-nrpe/files/plugins/check_hpraid | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/nagios-nrpe/files/plugins/check_hpraid b/nagios-nrpe/files/plugins/check_hpraid index 1bac12e6..2dc567b4 100644 --- a/nagios-nrpe/files/plugins/check_hpraid +++ b/nagios-nrpe/files/plugins/check_hpraid @@ -4,7 +4,7 @@ set -euo pipefail # This check_hpraid is a fork from check_cciss v0.15 written by Simone Rosa. # Fork written by Evolix and for Evolix usage (Debian only). -# Usage of old tools and drivers were removed to use only the smartpqi driver and the ssacli tool from HP. +# Usage of old tools and drivers were removed to use only the smartpqi or hpsa drivers and the ssacli tool from HP. # Tools not used on Debian were also removed. # Linting tool shellcheck was used to use a better bash coding style. # Upstream at: @@ -77,7 +77,7 @@ set -euo pipefail # Battery/Capacitor Status: Failed (Replace Batteries/Capacitors) PROGNAME=$(basename "$0") -NAGIOS_PLUGINS="/usr/lib/nagios/plugins/" +NAGIOS_PLUGINS="/usr/lib/nagios/plugins" REVISION="0.16-evolix" DEBUG="0" VERBOSE="0" @@ -138,16 +138,15 @@ do esac done -# Use smartpqi driver +# Check if smartpqi or hpsa driver is loaded # https://manpages.debian.org/buster/manpages/smartpqi.4.en.html -if [ -d /sys/bus/pci/drivers/smartpqi ]; then - DRIVER="/sys/bus/pci/drivers/smartpqi" +if [ -d /sys/bus/pci/drivers/smartpqi ] || [ -d /sys/bus/pci/drivers/hpsa ]; then driverPresent='YES.' else driverPresent='No!' fi if [ "$DEBUG" = "1" ]; then - echo "### Check if \"HP Smart Array\" ($DRIVER) is present >>>\n${driverPresent}\n" + echo "### Check if \"HP Smart Array\" driver is present >>>\n${driverPresent}\n" fi if [[ "$driverPresent" == "No!" ]]; then echo "RAID UNKNOWN - HP Smart Array not found" @@ -193,12 +192,18 @@ fi for slot in $slots; do # Get "logicaldrive" for slot + set +e check2b=$(sudo -u root "$ssacli" controller slot="$slot" logicaldrive all show 2>&1) status=$? if test ${status} -ne 0; then + # Skip empty slots + if grep -q "The specified device does not have any logical drives." <<< "$check2b"; then + break + fi echo "RAID UNKNOWN - $ssacli did not execute properly : ""${check2b}" exit "$STATE_UNKNOWN" fi + set -e check2=${check2:-} check2="$check2$check2b" if [ "$DEBUG" = "1" ]; then From 7b97702f1587a6abdf458f445a44aae732bb812c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Thu, 4 Jun 2020 16:50:35 +0900 Subject: [PATCH 05/19] evolinux-base: Add check_hpraid.sh This script is meant to be executed as a cron by executing Nagios NRPE plugin check_hpraid and notify by mail any errors --- evolinux-base/files/check_hpraid.sh | 64 +++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 evolinux-base/files/check_hpraid.sh diff --git a/evolinux-base/files/check_hpraid.sh b/evolinux-base/files/check_hpraid.sh new file mode 100644 index 00000000..b4c8ffc0 --- /dev/null +++ b/evolinux-base/files/check_hpraid.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +set -euo pipefail + +# This script is meant to be executed as a cron by executing Nagios +# NRPE plugin check_hpraid and notify by mail any errors + +TMPDIR=/tmp +md5sum=$(command -v md5sum) +awk=$(command -v awk) +check_hpraid="/usr/local/lib/nagios/plugins/check_hpraid -v" +check_hpraid_output=$(mktemp -p $TMPDIR check_hpraid_XXX) +check_hpraid_last="$TMPDIR/check_hpraid_last" +trap trapFunc EXIT ERR + +testDeps() { + + test -x "$md5sum" || (echo "md5sum binary not found"; exit 1) + test -x "$awk" || (echo "awk binary not found"; exit 1) +} + +main() { + + if ! $check_hpraid > "$check_hpraid_output"; then + error=true + else + error=false + fi + if [ ! -f $check_hpraid_last ]; then + cp "$check_hpraid_output" $check_hpraid_last + fi + + # If output and last check is different, display differences and + # exit + md5_now=$(md5sum "$check_hpraid_output" | awk '{print $1}') + md5_last=$(md5sum $check_hpraid_last | awk '{print $1}') + if [[ "$md5_now" != "$md5_last" ]]; then + cat << EOT + Different RAID state detected. + Was: + $check_hpraid_last + Is now: + $check_hpraid_output +EOT + exit 1 + fi + + # If check_hpraid returned error, display output, save status and + # exit + if $error; then + cp "$check_hpraid_output" $check_hpraid_last + cat "$check_hpraid_output" + exit 1 + else + exit 0 + fi +} + +trapFunc() { + + rm "$check_hpraid_output" +} + +testDeps +main From 91dda2e1a2671234afabc6b8ea85d2554d63a20d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Thu, 4 Jun 2020 17:23:14 +0900 Subject: [PATCH 06/19] evolinux-base: check_hpraid.sh: Fix RAID state detection --- evolinux-base/files/check_hpraid.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/evolinux-base/files/check_hpraid.sh b/evolinux-base/files/check_hpraid.sh index b4c8ffc0..3c266ba4 100644 --- a/evolinux-base/files/check_hpraid.sh +++ b/evolinux-base/files/check_hpraid.sh @@ -37,11 +37,12 @@ main() { cat << EOT Different RAID state detected. Was: - $check_hpraid_last + $(cat $check_hpraid_last) Is now: - $check_hpraid_output + $(cat $check_hpraid_output) EOT - exit 1 + cp "$check_hpraid_output" $check_hpraid_last + exit 1 fi # If check_hpraid returned error, display output, save status and From 342810362da951adda13099ad0479346ae64939c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Thu, 4 Jun 2020 17:32:49 +0900 Subject: [PATCH 07/19] evolinux-base: check_hpraid.sh: Fix missing copy of RAID state --- evolinux-base/files/check_hpraid.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/evolinux-base/files/check_hpraid.sh b/evolinux-base/files/check_hpraid.sh index 3c266ba4..9c5548c2 100644 --- a/evolinux-base/files/check_hpraid.sh +++ b/evolinux-base/files/check_hpraid.sh @@ -52,6 +52,7 @@ EOT cat "$check_hpraid_output" exit 1 else + cp "$check_hpraid_output" $check_hpraid_last exit 0 fi } From 6126be95e34d5bfaeb85000a0f98a16fe803d601 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Tue, 16 Jun 2020 10:36:24 +0900 Subject: [PATCH 08/19] nagios-nrpe: check_hpraid: Be sure that variables are bound --- nagios-nrpe/files/plugins/check_hpraid | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/nagios-nrpe/files/plugins/check_hpraid b/nagios-nrpe/files/plugins/check_hpraid index 2dc567b4..42911dd3 100644 --- a/nagios-nrpe/files/plugins/check_hpraid +++ b/nagios-nrpe/files/plugins/check_hpraid @@ -19,6 +19,13 @@ set -euo pipefail # using HPE Smart Storage Administrator. It should support Debian 9 and over. # (Array, controller, cache, battery, etc...) # +# Known working RAID controllers: +# +# - Adaptec Smart Storage PQI 12G SAS/PCIe 3 (rev 01) +# | Smart Array P408i-a SR Gen10 +# | Smart Array E208i-a SR Gen10 +# +# # NOTE: # # You need to install the proprietary tool HPE Smart Storage Administrator (ssacli) from: @@ -241,6 +248,9 @@ if [ "$EXCLUDE_BATTERY" = "1" ]; then check=$(grep -v 'Cache Status: Temporarily Disabled' "$check") fi +check=${check:-} +check2=${check2:-} +check3=${check3:-} if grep -qiE Failed <<< "$check"; then echo "RAID CRITICAL - HP Smart Array Failed: ${check}" exit "$STATE_CRITICAL" From d49da6954ac93d0b0be9ad1705cd5e7b38455f30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Tue, 16 Jun 2020 10:53:00 +0900 Subject: [PATCH 09/19] nagios-nrpe: check_hpraid: Fix wrong command name in examples --- nagios-nrpe/files/plugins/check_hpraid | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nagios-nrpe/files/plugins/check_hpraid b/nagios-nrpe/files/plugins/check_hpraid index 42911dd3..5f140059 100644 --- a/nagios-nrpe/files/plugins/check_hpraid +++ b/nagios-nrpe/files/plugins/check_hpraid @@ -38,11 +38,11 @@ set -euo pipefail # # Examples: # -# ./check_cciss +# ./check_hpraid # ---------------- # RAID OK # -# ./check_cciss -v +# ./check_hpraid -v # ------------------- # RAID OK: Smart Array 6i in Slot 0 array A logicaldrive 1 (67.8 GB, RAID 1+0, OK) # [Controller Status: OK Cache Status: OK Battery Status: OK] @@ -55,7 +55,7 @@ set -euo pipefail # array A logicaldrive 1 (33.9 GB, RAID 1, Recovering, 26% complete) \ # physicaldrive 1:0 (port 1:id 0 , Parallel SCSI, 36.4 GB, Rebuilding) # -# ./check_cciss -v -p +# ./check_hpraid -v -p # -------------------- # RAID OK: Smart Array 6i in Slot 0 (Embedded) array A logicaldrive 1 (33.9 GB, RAID 1, OK) # physicaldrive 2:0 (port 2:id 0 , Parallel SCSI, 36.4 GB, OK) @@ -73,7 +73,7 @@ set -euo pipefail # physicaldrive 1:0 (port 1:id 0 , Parallel SCSI, 36.4 GB, Rebuilding) \ # physicaldrive 1:1 (port 1:id 1 , Parallel SCSI, 36.4 GB, OK) # -# ./check_cciss -v -b +# ./check_hpraid -v -b # ---------------- # # RAID OK: Smart Array 6i in Slot 0 (Embedded) array A logicaldrive 1 (33.9 GB, RAID 1, OK) [Controller Status: OK] From b47d2b872cd23b39f991fa2e0906abd71f18fcbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Tue, 16 Jun 2020 10:57:18 +0900 Subject: [PATCH 10/19] nagios-nrpe: check_hpraid: Fixed wrong grep in EXCLUDE_BATTERY --- nagios-nrpe/files/plugins/check_hpraid | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nagios-nrpe/files/plugins/check_hpraid b/nagios-nrpe/files/plugins/check_hpraid index 5f140059..4f3a3ed3 100644 --- a/nagios-nrpe/files/plugins/check_hpraid +++ b/nagios-nrpe/files/plugins/check_hpraid @@ -244,8 +244,8 @@ fi # Omit battery/capacitor/cache status check if requested EXCLUDE_BATTERY=${EXCLUDE_BATTERY:-0} if [ "$EXCLUDE_BATTERY" = "1" ]; then - check=$(grep -v 'Battery/Capacitor Status: Failed (Replace Batteries/Capacitors)' "$check") - check=$(grep -v 'Cache Status: Temporarily Disabled' "$check") + check=$(grep -v 'Battery/Capacitor Status: Failed (Replace Batteries/Capacitors)' <<< "$check") + check=$(grep -v 'Cache Status: Temporarily Disabled' <<< "$check") fi check=${check:-} From 09e17ffe6c34f151ea7d3360b2a5f52b090b4d12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Tue, 16 Jun 2020 11:16:44 +0900 Subject: [PATCH 11/19] nagios-nrpe: check_hpraid: Use printf for return lines --- nagios-nrpe/files/plugins/check_hpraid | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nagios-nrpe/files/plugins/check_hpraid b/nagios-nrpe/files/plugins/check_hpraid index 4f3a3ed3..7181bcd5 100644 --- a/nagios-nrpe/files/plugins/check_hpraid +++ b/nagios-nrpe/files/plugins/check_hpraid @@ -230,7 +230,7 @@ for slot in $slots; do exit "$STATE_UNKNOWN" fi fi - check2="$check2$check2b" + printf -v check2 "%s\n%s" "$check2" "$check2b" if [ "$DEBUG" = "1" ]; then echo "### Get \"physicaldrive\" for slot >>>\n""${check2b}""\n" fi @@ -278,7 +278,7 @@ elif grep -qiE FIRMWARE <<< "$check"; then else if [ "$DEBUG" = "1" ] || [ "$VERBOSE" = "1" ]; then check3=$(grep -E Status <<< "$check") - echo "RAID OK: ${check2} [${check3}]" + printf "RAID OK: %s\n%s\n" "$check2" "$check3" else echo "RAID OK" fi From 74229809ff3333f0911f1f8b3a55277d1b0b1d5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Tue, 16 Jun 2020 12:28:10 +0900 Subject: [PATCH 12/19] nagios-nrpe: Add check_hpraid in template --- nagios-nrpe/templates/evolix.cfg.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/nagios-nrpe/templates/evolix.cfg.j2 b/nagios-nrpe/templates/evolix.cfg.j2 index 89d7e7d9..b8c7500e 100644 --- a/nagios-nrpe/templates/evolix.cfg.j2 +++ b/nagios-nrpe/templates/evolix.cfg.j2 @@ -69,6 +69,7 @@ command[check_varnish]={{ nagios_plugins_directory }}/check_varnish_health -i 12 command[check_haproxy]=sudo {{ nagios_plugins_directory }}/check_haproxy_stats -s /run/haproxy/admin.sock -w 80 -c 90 --ignore-maint --ignore-nolb command[check_minifirewall]=sudo {{ nagios_plugins_directory }}/check_minifirewall command[check_redis_instances]={{ nagios_plugins_directory }}/check_redis_instances +command[check_hpraid]={{ nagios_plugins_directory }}/check_hpraid # Check HTTP "many". Use this to check many websites (http, https, ports, sockets and SSL certificates). # Beware! All checks must not take more than 10s! From 241f50d27ee1ae7cdde3255c4c4327b3e3d14c62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Tue, 16 Jun 2020 12:34:48 +0900 Subject: [PATCH 13/19] nagios-nrpe: check_hpraid: Update known working RAID controllers --- nagios-nrpe/files/plugins/check_hpraid | 1 + 1 file changed, 1 insertion(+) diff --git a/nagios-nrpe/files/plugins/check_hpraid b/nagios-nrpe/files/plugins/check_hpraid index 7181bcd5..0e6a7729 100644 --- a/nagios-nrpe/files/plugins/check_hpraid +++ b/nagios-nrpe/files/plugins/check_hpraid @@ -23,6 +23,7 @@ set -euo pipefail # # - Adaptec Smart Storage PQI 12G SAS/PCIe 3 (rev 01) # | Smart Array P408i-a SR Gen10 +# | Smart Array P408i-p SR Gen10 # | Smart Array E208i-a SR Gen10 # # From 4bec21a9f330a60957f9f164d04da656e77dbd1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Tue, 16 Jun 2020 12:35:56 +0900 Subject: [PATCH 14/19] evolinux-base: harware: Support HP gen >=10 RAID controller --- evolinux-base/defaults/main.yml | 3 ++ .../{check_hpraid.sh => check_hpraid.cron.sh} | 0 evolinux-base/tasks/hardware.yml | 41 ++++++++++++++----- 3 files changed, 33 insertions(+), 11 deletions(-) rename evolinux-base/files/{check_hpraid.sh => check_hpraid.cron.sh} (100%) diff --git a/evolinux-base/defaults/main.yml b/evolinux-base/defaults/main.yml index 9681f0b7..2a85ecc5 100644 --- a/evolinux-base/defaults/main.yml +++ b/evolinux-base/defaults/main.yml @@ -214,3 +214,6 @@ evolinux_listupgrade_include: True # Generate ldif evolinux_generateldif_include: True + +# Cron check_hpraid +evolinux_cron_checkhpraid_frequency: daily diff --git a/evolinux-base/files/check_hpraid.sh b/evolinux-base/files/check_hpraid.cron.sh similarity index 100% rename from evolinux-base/files/check_hpraid.sh rename to evolinux-base/files/check_hpraid.cron.sh diff --git a/evolinux-base/tasks/hardware.yml b/evolinux-base/tasks/hardware.yml index 628cd6d1..bf7f46b6 100644 --- a/evolinux-base/tasks/hardware.yml +++ b/evolinux-base/tasks/hardware.yml @@ -25,15 +25,17 @@ when: broadcom_netextreme_search.rc == 0 ## RAID - +# Dell and others: MegaRAID SAS +# HP gen <10: Hewlett-Packard Company Smart Array +# HP gen >=10: Adaptec Smart Storage PQI - name: Detect if RAID is installed - shell: lspci | grep "RAID bus controller" | grep -v Intel + shell: lspci -q | grep -e "RAID bus controller" -e "Serial Attached SCSI controller" check_mode: no register: raidmodel changed_when: "'FAILED' in raidmodel.stdout" failed_when: "'FAILED' in raidmodel.stdout" -- name: HP Smart Array package is present +- name: HPE Smart Storage Administrator (ssacli) is present block: - name: Add HPE GPG key apt_key: @@ -44,28 +46,45 @@ apt_repository: repo: 'deb https://downloads.linux.hpe.com/SDR/repo/mcp {{ ansible_distribution_release }}/current non-free' state: present - - - name: Install packages for HP hardware + - name: Install HPE Smart Storage Administrator (ssacli) apt: - name: - - cciss-vol-status - - ssacli + name: ssacli + when: + - "'Hewlett-Packard Company Smart Array' in raidmodel.stdout" + - "'Adaptec Smart Storage PQI' in raidmodel.stdout" + +# NOTE: check_hpraid cron use check_hpraid from nagios-nrpe role +# So, if nagios-nrpe role is not installed it will not work +- name: Install and configure check_hpraid cron (HP gen >=10) + block: + - name: check_hpraid cron is present (HP gen >=10) + copy: + src: check_hpraid.cron.sh + dest: /etc/cron.{{ evolinux_cron_checkhpraid_frequency | mandatory }}/check_hpraid + mode: "0755" + when: "'Adaptec Smart Storage PQI' in raidmodel.stdout" + +- name: Install and configure cciss-vol-status (HP gen <10) + block: + - name: Install cciss-vol-status (HP gen <10) + apt: + name: cciss-vol-status state: present - - name: cciss-vol-statusd init script is present + - name: cciss-vol-statusd init script is present (HP gen <10) template: src: hardware/cciss-vol-statusd.j2 dest: /etc/init.d/cciss-vol-statusd mode: "0755" - - name: Configure cciss-vol-statusd + - name: Configure cciss-vol-statusd (HP gen <10) lineinfile: dest: /etc/default/cciss-vol-statusd line: 'MAILTO="{{ raid_alert_email or general_alert_email | mandatory }}"' regexp: 'MAILTO=' create: yes - - name: Enable HP hardware in systemd + - name: Enable cciss-vol-status in systemd (HP gen <10) service: name: cciss-vol-statusd enabled: true From a74f4e18905ddf1df31b5b94808bfe5d1e7a99a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Tue, 16 Jun 2020 12:42:33 +0900 Subject: [PATCH 15/19] evolinux-base/tasks/hardware.yml: Removed trailing whitespace --- evolinux-base/tasks/hardware.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evolinux-base/tasks/hardware.yml b/evolinux-base/tasks/hardware.yml index bf7f46b6..c44c1248 100644 --- a/evolinux-base/tasks/hardware.yml +++ b/evolinux-base/tasks/hardware.yml @@ -76,7 +76,7 @@ src: hardware/cciss-vol-statusd.j2 dest: /etc/init.d/cciss-vol-statusd mode: "0755" - + - name: Configure cciss-vol-statusd (HP gen <10) lineinfile: dest: /etc/default/cciss-vol-statusd From 766b4dfa826410042a20d632b1086d5324619754 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Tue, 16 Jun 2020 13:20:43 +0900 Subject: [PATCH 16/19] evolinux-base: check_hpraid cron: Add -p --- evolinux-base/files/check_hpraid.cron.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evolinux-base/files/check_hpraid.cron.sh b/evolinux-base/files/check_hpraid.cron.sh index 9c5548c2..d8330f44 100644 --- a/evolinux-base/files/check_hpraid.cron.sh +++ b/evolinux-base/files/check_hpraid.cron.sh @@ -7,7 +7,7 @@ set -euo pipefail TMPDIR=/tmp md5sum=$(command -v md5sum) awk=$(command -v awk) -check_hpraid="/usr/local/lib/nagios/plugins/check_hpraid -v" +check_hpraid="/usr/local/lib/nagios/plugins/check_hpraid -v -p" check_hpraid_output=$(mktemp -p $TMPDIR check_hpraid_XXX) check_hpraid_last="$TMPDIR/check_hpraid_last" trap trapFunc EXIT ERR From a28b9558cb03c4d15cd8c991ad0b015f0db8cad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Wed, 24 Jun 2020 18:57:08 +0900 Subject: [PATCH 17/19] evolinux-base: check_hpraid.cron.sh: Better logic and use mail First step is to detect errors Second step is to detect different state Added mail comand to replace cron output --- evolinux-base/files/check_hpraid.cron.sh | 66 +++++++++++++++++------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/evolinux-base/files/check_hpraid.cron.sh b/evolinux-base/files/check_hpraid.cron.sh index d8330f44..442b53ba 100644 --- a/evolinux-base/files/check_hpraid.cron.sh +++ b/evolinux-base/files/check_hpraid.cron.sh @@ -10,6 +10,18 @@ awk=$(command -v awk) check_hpraid="/usr/local/lib/nagios/plugins/check_hpraid -v -p" check_hpraid_output=$(mktemp -p $TMPDIR check_hpraid_XXX) check_hpraid_last="$TMPDIR/check_hpraid_last" +# set to false to use cron output (MAILTO) +# otherwise send output with mail command +use_mail=true +body=$(mktemp --tmpdir=/tmp check_hpraid_XXX) +clientmail=$(grep EVOMAINTMAIL /etc/evomaintenance.cf | cut -d'=' -f2) +hostname=$(grep HOSTNAME /etc/evomaintenance.cf | cut -d'=' -f2) +hostname=${hostname%%.evolix.net} +# If hostname is composed with -, remove the first part. +if [[ $hostname =~ "-" ]]; then + hostname=$(echo "$hostname" | cut -d'-' -f2-) +fi + trap trapFunc EXIT ERR testDeps() { @@ -25,6 +37,23 @@ main() { else error=false fi + + # If check_hpraid returned error, display output, save status and + # exit + if $error; then + cp "$check_hpraid_output" "$check_hpraid_last" + if $use_mail; then + mail -s "RAID error on $hostname" "$clientmail" \ + <<< "$check_hpraid_output" + else + cat "$check_hpraid_output" + fi + exit 1 + else + cp "$check_hpraid_output" $check_hpraid_last + exit 0 + fi + if [ ! -f $check_hpraid_last ]; then cp "$check_hpraid_output" $check_hpraid_last fi @@ -34,32 +63,31 @@ main() { md5_now=$(md5sum "$check_hpraid_output" | awk '{print $1}') md5_last=$(md5sum $check_hpraid_last | awk '{print $1}') if [[ "$md5_now" != "$md5_last" ]]; then - cat << EOT - Different RAID state detected. - Was: - $(cat $check_hpraid_last) - Is now: - $(cat $check_hpraid_output) + cat << EOT > "$body" +Different RAID state detected. + +Was: +$(sed 's/^/> /g' "$check_hpraid_last") + +########################### + +Is now: +$(sed 's/^/> /g' "$check_hpraid_output") EOT - cp "$check_hpraid_output" $check_hpraid_last + if $use_mail; then + mail -s "RAID status is different on $hostname" \ + "$clientmail" <<< "$body" + else + cat "$body" + fi + cp "$check_hpraid_output" "$check_hpraid_last" exit 1 fi - - # If check_hpraid returned error, display output, save status and - # exit - if $error; then - cp "$check_hpraid_output" $check_hpraid_last - cat "$check_hpraid_output" - exit 1 - else - cp "$check_hpraid_output" $check_hpraid_last - exit 0 - fi } trapFunc() { - rm "$check_hpraid_output" + rm "$check_hpraid_output" "$body" } testDeps From 9a8f1979bcab6bb49534bb7159bc3b5a6d266edc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Fri, 26 Jun 2020 17:57:50 +0900 Subject: [PATCH 18/19] evolinux-base: check_hpraid.cron.sh: Fixed wrong else The logic was wrong, an else part was not necessary. --- evolinux-base/files/check_hpraid.cron.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/evolinux-base/files/check_hpraid.cron.sh b/evolinux-base/files/check_hpraid.cron.sh index 442b53ba..074e88a4 100644 --- a/evolinux-base/files/check_hpraid.cron.sh +++ b/evolinux-base/files/check_hpraid.cron.sh @@ -49,9 +49,6 @@ main() { cat "$check_hpraid_output" fi exit 1 - else - cp "$check_hpraid_output" $check_hpraid_last - exit 0 fi if [ ! -f $check_hpraid_last ]; then From 1c050b481af4f965c7d40a8ee3c2cacb1cd6038a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20S?= Date: Wed, 1 Jul 2020 10:18:30 +0900 Subject: [PATCH 19/19] evolinux-base: check_hpraid.cron.sh: Fixed wrong `<<<` usage --- evolinux-base/files/check_hpraid.cron.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evolinux-base/files/check_hpraid.cron.sh b/evolinux-base/files/check_hpraid.cron.sh index 074e88a4..bd181716 100644 --- a/evolinux-base/files/check_hpraid.cron.sh +++ b/evolinux-base/files/check_hpraid.cron.sh @@ -44,7 +44,7 @@ main() { cp "$check_hpraid_output" "$check_hpraid_last" if $use_mail; then mail -s "RAID error on $hostname" "$clientmail" \ - <<< "$check_hpraid_output" + < "$check_hpraid_output" else cat "$check_hpraid_output" fi @@ -73,7 +73,7 @@ $(sed 's/^/> /g' "$check_hpraid_output") EOT if $use_mail; then mail -s "RAID status is different on $hostname" \ - "$clientmail" <<< "$body" + "$clientmail" < "$body" else cat "$body" fi